Skip to content

Commit feea037

Browse files
committed
Work in Progress: sc2reader rewrite.
This changeset represents a significant restructuring that should make options easier to handle, improve debugging, and provide a highly literate base of source code for understanding the replay format and the intricacies of parsing it into a workable object tree. TODO: use SC2ReaderError exceptions, comment all files, finish processors.
1 parent c5e8891 commit feea037

File tree

7 files changed

+664
-315
lines changed

7 files changed

+664
-315
lines changed

sc2reader/__init__.py

Lines changed: 155 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,70 +1,173 @@
1-
import os, copy
1+
# -*- coding: utf-8 -*-
2+
"""
3+
sc2reader
4+
----------
25
3-
import mpyq
6+
A Starcraft II replay parsing library intended to promote innovation in
7+
Starcraft tools and communities. Eventually, it will cover all official
8+
releases and dump easily to JSON for inter-language portabilty.
49
5-
import config
6-
from objects import Replay
7-
from utils import ReplayBuffer, AttributeDict
10+
sc2reader has been carefully written for clarity to serve as a starting
11+
point for those who want to write their own parsers, potentially in other
12+
languages for native access.
813
9-
__version__ = "0.3.0"
10-
__author__ = "Graylin Kim <[email protected]>"
14+
Enjoy.
1115
16+
:copyright: (c) 2011 Graylin Kim
17+
:license: MIT, See LICENSE.txt for details
18+
"""
1219

13-
class SC2Reader(object):
14-
def __init__(self, **options):
15-
#Set Defaults before configuring with user options
16-
self.options = AttributeDict(
17-
directory="",
18-
processors=[],
19-
debug=False,
20-
verbose=False,
21-
parse_events=True)
22-
self.configure(**options)
20+
__version__ = '0.3-dev'
2321

24-
def read(self, location):
25-
if self.options.directory:
26-
location = os.path.join(self.options.directory,location)
22+
#System imports
23+
import os
2724

28-
if self.options.verbose: print "Reading: %s" % location
25+
#PyPi imports
26+
import mpyq
2927

30-
if os.path.isdir(location):
31-
#SC2Reader::read each subfile/directory and combine the lists
32-
read = lambda file: self.read(os.path.join(location,file))
33-
tolist = lambda x: [x] if isinstance(x,Replay) else x
34-
return sum(map(tolist,(read(x) for x in os.listdir(location))),[])
28+
#Package imports
29+
import config
30+
import objects
31+
import utils
3532

36-
with open(location) as replay_file:
37-
replay = Replay(replay_file,**self.options.copy())
38-
archive = mpyq.MPQArchive(location,listfile=False)
3933

40-
for file in self.files:
41-
buffer = ReplayBuffer(archive.read_file(file))
42-
read = config.readers[replay.build][file]
43-
read(buffer,replay)
34+
class Reader(object):
35+
""" The SC2Reader class acts as a factory class for replay objects. The
36+
class accepts a key=value list of options to override defaults (see
37+
config.py) and exposes a very simple read/configure interface and
38+
orchestrates the replay build process.
39+
"""
4440

45-
#Handle user processors after internal processors
46-
for process in self.processors+self.options.processors:
47-
replay = process(replay)
4841

49-
return replay
42+
def __init__(self, **user_options):
43+
""" The constructor makes a copy of the default_options to make sure the
44+
option configuration doesn't propogate back to the default_options.
45+
It should support any arbitrary number of different Reader objects.
46+
"""
47+
self.options = config.default_options.copy()
48+
self.configure(**user_options)
5049

5150
def configure(self,**options):
5251
self.options.update(options)
5352

54-
#Update system configuration
55-
myconfig = config.full if self.options.parse_events else config.partial
56-
self.files = myconfig.files
57-
self.processors = myconfig.processors
53+
# Depending on the options choosen, the system needs to update related
54+
# options and setting in order to get the reading right.
55+
self.sys = config.full if self.options.parse_events else config.partial
56+
57+
def read(self, location, **user_options):
58+
""" Read indicated file or recursively read matching files from the
59+
specified directory. Returns a replay or a list of replays depending
60+
on the context.
61+
"""
62+
63+
# Base the options off a copy to leave the Reader options uneffected.
64+
options = self.options.copy()
65+
options.update(user_options)
66+
67+
# The directory option allows users to specify file locations relative
68+
# to a location other than the present working directory by joining the
69+
# location with the directory of their choice.
70+
if options.directory:
71+
location = os.path.join(options.directory,location)
72+
73+
# When passed a directory as the location, the Reader recursively builds
74+
# a list of replays to return using the utils.get_files function. This
75+
# function respects the following arguments:
76+
# * depth: The maximum depth to traverse. Defaults to unlimited (-1)
77+
# * follow_symlinks: Boolean for following symlinks. Defaults to True
78+
# * exclude_dirs: A list of directory names to skip while recursing
79+
# * incldue_regex: A regular expression rule which all returned file
80+
# names must match. Defaults to None
81+
#
82+
replays, files = list(), utils.get_files(location,**options)
83+
84+
# If no files are found, it could be for a variety of reasons
85+
# raise a NoMatchingFilesError to alert them to the situation
86+
if not files:
87+
raise NoMatchingFilesError()
88+
89+
for location in files:
90+
if options.verbose: print "Reading: %s" % location
91+
92+
with open(location) as replay_file:
93+
# The Replay constructor scans the header of the replay file for
94+
# the build number and stores the options for later use. The
95+
# options are copied so subsequent option changes are isolated.
96+
replay = objects.Replay(replay_file,**options.copy())
97+
98+
# .SC2Replay files are written in Blizzard's MPQ Archive format.
99+
# The format stores a header which contains a block table that
100+
# specifies the location of each encrypted file.
101+
#
102+
# Unfortunately, some replay sites modify the replay contents to
103+
# add messages promoting their sites without updating the header
104+
# correctly. The listfile option(hack) lets us bypass this issue
105+
# by specifying the files we want instead of generating a list.
106+
archive = mpyq.MPQArchive(location,listfile=False)
107+
108+
# These files are configured for either full or partial parsing
109+
for file in self.sys.files:
110+
111+
# For each file, we build a smart buffer object from the
112+
# utf-8 encoded bitstream that mpyq extracts.
113+
buffer = utils.ReplayBuffer(archive.read_file(file))
114+
115+
# Each version of Starcraft slightly modifies some portions
116+
# of the format for some files. To work with this, the
117+
# config file has a nested lookup structure of
118+
# [build][file]=>reader which returns the appropriate reader
119+
#
120+
# TODO: Different versions also have different data mappings
121+
# sc2reader doesn't yet handle this difficulty.
122+
#
123+
# Readers use the type agnostic __call__ interface so that
124+
# they can be implemented as functions or classes as needed.
125+
#
126+
# Readers store the extracted information into the replay
127+
# object for post processing because correct interpretation
128+
# of the information often requires data from other files.
129+
config.readers[replay.build][file].__call__(buffer,replay)
130+
131+
# Now that the replay has been loaded with the "raw" data from
132+
# the archive files we run the system level post processors to
133+
# organize the data into a cross referenced data structure.
134+
#
135+
# After system level processors have run, call each of the post
136+
# processors provided by the user. This would be a good place to
137+
# convert the object to a serialized json string for cross
138+
# language processes or add custom attributes.
139+
#
140+
# TODO: Maybe we should switch this to a hook based architecture
141+
# Needs to be able to load "contrib" type processors..
142+
for process in self.sys.processors+self.options.processors:
143+
replay = process(replay)
144+
145+
replays.append(replay)
146+
147+
return replays
148+
149+
def read_file(file,**options):
150+
replays = self.read(file,**options)
151+
152+
# While normal usage would suggest passing in only filenames, it is
153+
# possible that directories could be passed in. Don't fail silently!
154+
if len(replays) > 1:
155+
raise MultipleMatchError(replays)
156+
157+
# Propogate the replay in a singular context
158+
return replays[0] if len(replays) > 0 else None
159+
160+
"""sc2reader uses a default SC2Reader class instance to provide a package level
161+
interface to its functionality. The package level interface presents the same
162+
functional interface, it just saves the hassel of creating the class object.
163+
"""
164+
__defaultReader = Reader()
165+
166+
def read(location, **user_options):
167+
return __defaultReader.read(location,**user_options)
58168

59-
60-
'''Package Level Interface'''
61-
__defaultSC2Reader = SC2Reader()
62-
63-
#Allow options on the package level read for one off reads.
64-
def read(location, **options):
65-
reader = SC2Reader(**options) if options else __defaultSC2Reader
66-
return reader.read(location)
67-
68-
#Allow package level configuration for lazy people
69169
def configure(**options):
70-
__defaultSC2Reader.configure(**options)
170+
config.default_options.update(options)
171+
172+
def reset():
173+
__defaultReader = Reader()

sc2reader/config.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
1-
from utils import AttributeDict
1+
from .processors import *
2+
from .readers import *
3+
from .utils import AttributeDict
24

3-
from processors import *
4-
from readers import *
5+
default_options = AttributeDict(
6+
directory="",
7+
processors=[],
8+
debug=False,
9+
verbose=False,
10+
parse_events=True,
11+
include_regex=None,
12+
exclude_dirs=[],
13+
recursive=True,
14+
depth=-1,
15+
follow_symlinks=True
16+
)
517

618
full = AttributeDict(
719
files = [
@@ -11,14 +23,16 @@
1123
'replay.message.events',
1224
'replay.game.events'],
1325
processors = [
14-
PeopleProcessor,
15-
AttributeProcessor,
16-
TeamsProcessor,
17-
MessageProcessor,
18-
RecorderProcessor,
19-
EventProcessor,
20-
ApmProcessor,
21-
ResultsProcessor],
26+
FullProcessor,
27+
#PeopleProcessor,
28+
#AttributeProcessor,
29+
#TeamsProcessor,
30+
#MessageProcessor,
31+
#RecorderProcessor,
32+
#EventProcessor,
33+
#ApmProcessor,
34+
#ResultsProcessor
35+
],
2236
)
2337

2438
partial = AttributeDict(

0 commit comments

Comments
 (0)