Skip to content

Commit 09bbc04

Browse files
committed
Adds CachedSC2Factories.
These factories perform basic caching of remote resources with minimal configuration. There are currently three kinds: * DictCachedSC2Factory - Caches results in memory with a dict. Uses an optional argument `cache_max_size` which dumbly limits the number of resources to cache in memory. Dict size is maintained by deleting the least active entry at insert time when full. Resource size can vary wildly at times; this factory does nothing to adjust for that. * FileCachedSC2Factory - Cache results on disk in files. Requires your configuration with a pre-existing `cache_dir` for writing files. * DoubleCachedSC2Factory - Uses both of the above caching layers to reduce IO time for commonly used resources. Order of operations: 1. try to load from DictCache and return. 2. try to load from FileCache, write to DictCache, and return. 3. Load remote resource, write to both caches and return. Requires `cache_dir` and accepts an optional `cache_max_size` to limit the DictCache size. These caching schemes can be enabled painlessly by calling: * sc2reader.useDictCache(cache_max_size=0, **SC2Factory_options) * sc2reader.useFileCache(cache_dir, **SC2Factory_options) * sc2reader.useDoubleCache(cache_dir, cache_max_size=0, **SC2Factory_options) Thanks ggtracker for letting me push these utilties to the public repo.
1 parent 237350b commit 09bbc04

File tree

3 files changed

+143
-40
lines changed

3 files changed

+143
-40
lines changed

sc2reader/__init__.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import absolute_import
22

3+
import sys
4+
35
# import submodules
46
from sc2reader import plugins, data, scripts
57

@@ -11,21 +13,35 @@
1113
# For backwards compatibility
1214
SC2Reader = factories.SC2Factory
1315

14-
# Expose a nice module level interface
15-
__defaultSC2Reader = factories.SC2Factory()
16+
def setFactory(factory):
17+
# Expose a nice module level interface
18+
module = sys.modules[__name__]
19+
module.load_replays = factory.load_replays
20+
module.load_replay = factory.load_replay
21+
module.load_maps = factory.load_maps
22+
module.load_map = factory.load_map
23+
module.load_game_summaries = factory.load_game_summaries
24+
module.load_game_summary = factory.load_game_summary
25+
module.load_map_infos = factory.load_map_infos
26+
module.load_map_info = factory.load_map_info
27+
module.load_map_histories = factory.load_map_headers
28+
module.load_map_history = factory.load_map_header
29+
30+
module.configure = factory.configure
31+
module.reset = factory.reset
32+
33+
module.register_plugin = factory.register_plugin
34+
module._defaultFactory = factory
35+
36+
def useFileCache(cache_dir, **options):
37+
setFactory(factories.FileCachedSC2Factory(cache_dir, **options))
38+
39+
def useDictCache(cache_max_size=0, **options):
40+
setFactory(factories.DictCachedSC2Factory(cache_max_size, **options))
41+
42+
def useDoubleCache(cache_dir, cache_max_size=0, **options):
43+
setFactory(factories.DoubleCachedSC2Factory(cache_dir, cache_max_size, **options))
1644

17-
load_replays = __defaultSC2Reader.load_replays
18-
load_replay = __defaultSC2Reader.load_replay
19-
load_maps = __defaultSC2Reader.load_maps
20-
load_map = __defaultSC2Reader.load_map
21-
load_game_summaries = __defaultSC2Reader.load_game_summaries
22-
load_game_summary = __defaultSC2Reader.load_game_summary
23-
load_map_infos = __defaultSC2Reader.load_map_infos
24-
load_map_info = __defaultSC2Reader.load_map_info
25-
load_map_histories = __defaultSC2Reader.load_map_headers
26-
load_map_history = __defaultSC2Reader.load_map_header
45+
setFactory(factories.SC2Factory())
2746

28-
configure = __defaultSC2Reader.configure
29-
reset = __defaultSC2Reader.reset
3047

31-
register_plugin = __defaultSC2Reader.register_plugin

sc2reader/factories.py

Lines changed: 103 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -219,28 +219,108 @@ def _load_resource(self, resource, options=None, **new_options):
219219
return (resource, resource_name)
220220

221221

222-
class SC2Cache(SC2Factory):
223-
224-
def __init__(self, **options):
225-
super(SC2Cache, self).__init__(self, **options)
226-
self.cache = IntitializeCache(**options)
227-
228-
def load_map(self, map_file, options=None, **new_options):
229-
options = options or utils.merged_dict(self.options, new_options)
230-
231-
if self.cache.has(map_file):
232-
return self.cache.get(map_file)
222+
import urlparse, time
223+
224+
class CachedSC2Factory(SC2Factory):
225+
226+
def get_remote_cache_key(self, remote_resource):
227+
# Strip the port and use the domain as the bucket
228+
# and use the full path as the key
229+
parseresult = urlparse.urlparse(remote_resource)
230+
bucket = re.sub(r':.*', '', parseresult.netloc)
231+
key = parseresult.path.strip('/')
232+
return (bucket, key)
233+
234+
def load_remote_resource_contents(self, remote_resource, **options):
235+
cache_key = self.get_remote_cache_key(remote_resource)
236+
if not self.cache_has(cache_key):
237+
resource = super(CachedSC2Factory, self).load_remote_resource_contents(remote_resource, **options)
238+
self.cache_set(cache_key, resource)
233239
else:
234-
map = super(SC2Cache, self).load_map(map_file, options=options)
235-
self.cache.set(map_file, map)
236-
return map
237-
238-
def load_replay(self, replay_file, options=None, **new_options):
239-
options = options or utils.merged_dict(self.options, new_options)
240-
241-
if self.cache.has(replay_file):
242-
return self.cache.get(replay_file)
240+
resource = self.cache_get(cache_key)
241+
return resource
242+
243+
def cache_has(self, cache_key):
244+
raise NotImplemented()
245+
246+
def cache_get(self, cache_key):
247+
raise NotImplemented()
248+
249+
def cache_set(self, cache_key, value):
250+
raise NotImplemented()
251+
252+
class FileCachedSC2Factory(CachedSC2Factory):
253+
def __init__(self, cache_dir, **options):
254+
super(FileCachedSC2Factory, self).__init__(**options)
255+
self.cache_dir = os.path.abspath(cache_dir)
256+
if not os.path.isdir(self.cache_dir):
257+
raise ValueError("cache_dir ({}) must be an existing directory.".format(self.cache_dir))
258+
elif not os.access(self.cache_dir, os.F_OK | os.W_OK | os.R_OK ):
259+
raise ValueError("Must have read/write access to {} for local file caching.".format(self.cache_dir))
260+
261+
def cache_has(self, cache_key):
262+
print "file has",cache_key
263+
return os.path.exists(self.cache_path(cache_key))
264+
265+
def cache_get(self, cache_key, **options):
266+
print "file get",cache_key
267+
return self.load_local_resource_contents(self.cache_path(cache_key),**options)
268+
269+
def cache_set(self, cache_key, value):
270+
print "file set",cache_key
271+
cache_path = self.cache_path(cache_key)
272+
bucket_dir = os.path.dirname(cache_path)
273+
if not os.path.exists(bucket_dir):
274+
os.makedirs(bucket_dir)
275+
276+
with open(cache_path, 'w') as out:
277+
out.write(value)
278+
279+
def cache_path(self, cache_key):
280+
return os.path.join(self.cache_dir,*(cache_key))
281+
282+
class DictCachedSC2Factory(CachedSC2Factory):
283+
def __init__(self, cache_max_size=0, **options):
284+
super(DictCachedSC2Factory, self).__init__(**options)
285+
self.cache_dict = dict()
286+
self.cache_used = dict()
287+
self.cache_max_size = cache_max_size
288+
289+
def cache_set(self, cache_key, value):
290+
print "dict set",cache_key
291+
if self.cache_max_size and len(self.cache_dict) >= self.cache_max_size:
292+
oldest_cache_key = min(self.cache_used.items(), key=lambda e: e[1])[0]
293+
print "dict del", oldest_cache_key
294+
del self.cache_used[oldest_cache_key]
295+
del self.cache_dict[oldest_cache_key]
296+
self.cache_dict[cache_key] = value
297+
self.cache_used[cache_key] = time.time()
298+
299+
def cache_get(self, cache_key):
300+
print "dict get",cache_key
301+
self.cache_used[cache_key] = time.time()
302+
return self.cache_dict[cache_key]
303+
304+
def cache_has(self, cache_key):
305+
print "dict has",cache_key
306+
return cache_key in self.cache_dict
307+
308+
class DoubleCachedSC2Factory(DictCachedSC2Factory, FileCachedSC2Factory):
309+
310+
def __init__(self, cache_dir, cache_max_size=0, **options):
311+
super(DoubleCachedSC2Factory, self).__init__(cache_max_size, cache_dir=cache_dir, **options)
312+
313+
def load_remote_resource_contents(self, remote_resource, **options):
314+
cache_key = self.get_remote_cache_key(remote_resource)
315+
316+
if DictCachedSC2Factory.cache_has(self, cache_key):
317+
return DictCachedSC2Factory.cache_get(self, cache_key)
318+
319+
if not FileCachedSC2Factory.cache_has(self, cache_key):
320+
resource = SC2Factory.load_remote_resource_contents(self, remote_resource, **options)
321+
FileCachedSC2Factory.cache_set(self, cache_key, resource)
243322
else:
244-
replay = super(SC2Cache, self).load_replay(replay_file, options=options)
245-
self.cache.set(replay_file, replay)
246-
return replay
323+
resource = FileCachedSC2Factory.cache_get(self, cache_key)
324+
325+
DictCachedSC2Factory.cache_set(self, cache_key, resource)
326+
return resource

sc2reader/scripts/sc2parse.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55
import sc2reader
66
import traceback
77

8+
sc2reader.useFileCache('/home/graylin/projects/sc2reader/local_cache')
9+
810
def main():
911
for argument in sys.argv[1:]:
1012
for path in sc2reader.utils.get_files(argument):
1113
try:
12-
replay = sc2reader.load_replay(path, debug=True)
14+
replay = sc2reader.load_replay(path, debug=True, verbose=True)
1315
except sc2reader.exceptions.ReadError as e:
1416
print e.replay.filename
1517
print '{build} - {real_type} on {map_name} - Played {start_time}'.format(**e.replay.__dict__)
@@ -23,7 +25,12 @@ def main():
2325
print path
2426
replay = sc2reader.load_replay(path, debug=True, load_level=1)
2527
print '{build} - {real_type} on {map_name} - Played {start_time}'.format(**replay.__dict__)
26-
print '[ERROR]', e
28+
print '[ERROR]', e.message
29+
for pid, attributes in replay.attributes.items():
30+
print pid, attributes
31+
for pid, info in enumerate(replay.raw_data['replay.details'].players):
32+
print pid, info
33+
print replay.raw_data['replay.initData'].player_names
2734
traceback.print_exc()
2835
print
2936

0 commit comments

Comments
 (0)