Skip to content

Commit f01fcdc

Browse files
author
Gordon B. McMillan
committed
An indexer that uses Metakit for storage.
Undo the dynamic re-open madness (now always RW). Various bug fixes.
1 parent a966edf commit f01fcdc

File tree

1 file changed

+145
-69
lines changed

1 file changed

+145
-69
lines changed

roundup/backends/back_metakit.py

Lines changed: 145 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,24 @@
11
from roundup import hyperdb, date, password, roundupdb
22
import metakit
33
import re, marshal, os, sys, weakref, time, calendar
4-
from roundup.indexer import Indexer
4+
from roundup import indexer
55

6-
_instances = weakref.WeakValueDictionary()
7-
8-
def Database(config, journaltag=None):
9-
if _instances.has_key(id(config)):
10-
db = _instances[id(config)]
11-
old = db.journaltag
12-
db.journaltag = journaltag
13-
try:
14-
delattr(db, 'curuserid')
15-
except AttributeError:
16-
pass
17-
return db
18-
else:
19-
db = _Database(config, journaltag)
20-
_instances[id(config)] = db
21-
return db
22-
23-
class _Database(hyperdb.Database):
6+
class Database(hyperdb.Database):
247
def __init__(self, config, journaltag=None):
258
self.config = config
269
self.journaltag = journaltag
2710
self.classes = {}
2811
self._classes = []
2912
self.dirty = 0
30-
self.__RW = 0
3113
self._db = self.__open()
32-
self.indexer = Indexer(self.config.DATABASE)
14+
self.indexer = Indexer(self.config.DATABASE, self._db)
3315
os.umask(0002)
3416
def post_init(self):
3517
if self.indexer.should_reindex():
3618
self.reindex()
3719

3820
def reindex(self):
21+
print "Reindexing!!!"
3922
for klass in self.classes.values():
4023
for nodeid in klass.list():
4124
klass.index(nodeid)
@@ -59,13 +42,10 @@ def getclasses(self):
5942
# --- exposed methods
6043
def commit(self):
6144
if self.dirty:
62-
if self.__RW:
63-
self._db.commit()
64-
for cl in self.classes.values():
65-
cl._commit()
66-
self.indexer.save_index()
67-
else:
68-
raise RuntimeError, "metakit is open RO"
45+
self._db.commit()
46+
for cl in self.classes.values():
47+
cl._commit()
48+
self.indexer.save_index()
6949
self.dirty = 0
7050
def rollback(self):
7151
if self.dirty:
@@ -82,6 +62,8 @@ def pack(self, pack_before):
8262
pass
8363
def addclass(self, cl):
8464
self.classes[cl.classname] = cl
65+
if self.tables.find(name=cl.classname) < 0:
66+
self.tables.append(name=cl.classname)
8567
def addjournal(self, tablenm, nodeid, action, params):
8668
tblid = self.tables.find(name=tablenm)
8769
if tblid == -1:
@@ -114,22 +96,12 @@ def gethistory(self, tablenm, nodeid):
11496
return rslt
11597

11698
def close(self):
117-
import time
118-
now = time.time
119-
start = now()
12099
for cl in self.classes.values():
121100
cl.db = None
122-
#self._db.rollback()
123-
#print "pre-close cleanup of DB(%d) took %2.2f secs" % (self.__RW, now()-start)
124101
self._db = None
125-
#print "close of DB(%d) took %2.2f secs" % (self.__RW, now()-start)
126102
self.classes = {}
127-
try:
128-
del _instances[id(self.config)]
129-
except KeyError:
130-
pass
131-
self.__RW = 0
132-
103+
self.indexer = None
104+
133105
# --- internal
134106
def __open(self):
135107
self.dbnm = db = os.path.join(self.config.DATABASE, 'tracker.mk4')
@@ -147,11 +119,7 @@ def __open(self):
147119
else:
148120
# can't find schemamod - must be frozen
149121
self.fastopen = 1
150-
else:
151-
self.__RW = 1
152-
if not self.fastopen:
153-
self.__RW = 1
154-
db = metakit.storage(db, self.__RW)
122+
db = metakit.storage(db, 1)
155123
hist = db.view('history')
156124
tables = db.view('tables')
157125
if not self.fastopen:
@@ -162,21 +130,7 @@ def __open(self):
162130
self.tables = tables
163131
self.hist = hist
164132
return db
165-
def isReadOnly(self):
166-
return self.__RW == 0
167-
def getWriteAccess(self):
168-
if self.journaltag is not None and self.__RW == 0:
169-
#now = time.time
170-
#start = now()
171-
self._db = None
172-
#print "closing the file took %2.2f secs" % (now()-start)
173-
#start = now()
174-
self._db = metakit.storage(self.dbnm, 1)
175-
self.__RW = 1
176-
self.hist = self._db.view('history')
177-
self.tables = self._db.view('tables')
178-
#print "getting RW access took %2.2f secs" % (now()-start)
179-
133+
180134
_STRINGTYPE = type('')
181135
_LISTTYPE = type([])
182136
_CREATE, _SET, _RETIRE, _LINK, _UNLINK = range(5)
@@ -247,6 +201,7 @@ def react(self, event, detector):
247201
self.reactors[event].append(detector)
248202
# --- the hyperdb.Class methods
249203
def create(self, **propvalues):
204+
self.fireAuditors('create', None, propvalues)
250205
rowdict = {}
251206
rowdict['id'] = newid = self.maxid
252207
self.maxid += 1
@@ -292,6 +247,8 @@ def set(self, nodeid, **propvalues):
292247
if propvalues.has_key('#ISNEW'):
293248
isnew = 1
294249
del propvalues['#ISNEW']
250+
if not isnew:
251+
self.fireAuditors('set', nodeid, propvalues)
295252
if not propvalues:
296253
return
297254
if propvalues.has_key('id'):
@@ -484,10 +441,13 @@ def set(self, nodeid, **propvalues):
484441
if self.do_journal:
485442
if isnew:
486443
self.db.addjournal(self.classname, nodeid, _CREATE, {})
444+
self.fireReactors('create', nodeid, None)
487445
else:
488446
self.db.addjournal(self.classname, nodeid, _SET, changes)
447+
self.fireReactors('set', nodeid, oldnode)
489448

490449
def retire(self, nodeid):
450+
self.fireAuditors('retire', nodeid, None)
491451
view = self.getview(1)
492452
ndx = view.find(id=int(nodeid))
493453
if ndx < 0:
@@ -503,6 +463,7 @@ def retire(self, nodeid):
503463
if ndx > -1:
504464
iv.delete(ndx)
505465
self.db.dirty = 1
466+
self.fireReactors('retire', nodeid, None)
506467
def history(self, nodeid):
507468
if not self.do_journal:
508469
raise ValueError, 'Journalling is disabled for this class'
@@ -518,11 +479,9 @@ def setkey(self, propname):
518479
if self.db.fastopen and iv.structure():
519480
return
520481
# very first setkey ever
521-
self.db.getWriteAccess()
522482
self.db.dirty = 1
523483
iv = self.db._db.getas('_%s[k:S,i:I]' % self.classname)
524484
iv = iv.ordered(1)
525-
#XXX
526485
# print "setkey building index"
527486
for row in self.getview():
528487
iv.append(k=getattr(row, propname), i=row.id)
@@ -543,6 +502,20 @@ def lookup(self, keyvalue):
543502
if ndx > -1:
544503
return str(view[ndx].id)
545504
raise KeyError, keyvalue
505+
506+
def destroy(self, keyvalue):
507+
#TODO clean this up once Richard's said how it should work
508+
iv = self.getindexview()
509+
if iv:
510+
ndx = iv.find(k=keyvalue)
511+
if ndx > -1:
512+
id = iv[ndx].i
513+
iv.delete(ndx)
514+
view = self.getview()
515+
ndx = view.find(id=id)
516+
if ndx > -1:
517+
view.delete(ndx)
518+
546519
def find(self, **propspec):
547520
"""Get the ids of nodes in this class which link to the given nodes.
548521
@@ -615,7 +588,6 @@ def addprop(self, **properties):
615588
if self.ruprops.has_key(key):
616589
raise ValueError, "%s is already a property of %s" % (key, self.classname)
617590
self.ruprops.update(properties)
618-
self.db.getWriteAccess()
619591
self.db.fastopen = 0
620592
view = self.__getview()
621593
self.db.commit()
@@ -865,7 +837,6 @@ def __getview(self):
865837
return view.ordered(1)
866838
# need to create or restructure the mk view
867839
# id comes first, so MK will order it for us
868-
self.db.getWriteAccess()
869840
self.db.dirty = 1
870841
s = ["%s[id:I" % self.classname]
871842
for nm, rutyp in self.ruprops.items():
@@ -878,12 +849,8 @@ def __getview(self):
878849
self.db.commit()
879850
return v.ordered(1)
880851
def getview(self, RW=0):
881-
if RW and self.db.isReadOnly():
882-
self.db.getWriteAccess()
883852
return self.db._db.view(self.classname).ordered(1)
884853
def getindexview(self, RW=0):
885-
if RW and self.db.isReadOnly():
886-
self.db.getWriteAccess()
887854
return self.db._db.view("_%s" % self.classname).ordered(1)
888855

889856
def _fetchML(sv):
@@ -912,6 +879,7 @@ def _fetchDate(n):
912879
hyperdb.Password : _fetchPW,
913880
hyperdb.Boolean : lambda n: n,
914881
hyperdb.Number : lambda n: n,
882+
hyperdb.String : str,
915883
}
916884

917885
class FileName(hyperdb.String):
@@ -966,7 +934,7 @@ def create(self, **propvalues):
966934
mimetype = propvalues.get('type', self.default_mime_type)
967935
self.db.indexer.add_text((self.classname, newid, 'content'), content, mimetype)
968936
def undo(fnm=nm, action1=os.remove, indexer=self.db.indexer):
969-
remove(fnm)
937+
action1(fnm)
970938
self.rollbackaction(undo)
971939
return newid
972940
def index(self, nodeid):
@@ -995,4 +963,112 @@ def __init__(self, db, classname, **properties):
995963
if not properties.has_key('superseder'):
996964
properties['superseder'] = hyperdb.Multilink(classname)
997965
Class.__init__(self, db, classname, **properties)
966+
967+
CURVERSION = 1
998968

969+
class Indexer(indexer.Indexer):
970+
disallows = {'THE':1, 'THIS':1, 'ZZZ':1, 'THAT':1, 'WITH':1}
971+
def __init__(self, path, datadb):
972+
self.db = metakit.storage(os.path.join(path, 'index.mk4'), 1)
973+
self.datadb = datadb
974+
self.reindex = 0
975+
v = self.db.view('version')
976+
if not v.structure():
977+
v = self.db.getas('version[vers:I]')
978+
self.db.commit()
979+
v.append(vers=CURVERSION)
980+
self.reindex = 1
981+
elif v[0].vers != CURVERSION:
982+
v[0].vers = CURVERSION
983+
self.reindex = 1
984+
if self.reindex:
985+
self.db.getas('ids[tblid:I,nodeid:I,propid:I]')
986+
self.db.getas('index[word:S,hits[pos:I]]')
987+
self.db.commit()
988+
self.reindex = 1
989+
self.changed = 0
990+
self.propcache = {}
991+
def force_reindex(self):
992+
v = self.db.view('ids')
993+
v[:] = []
994+
v = self.db.view('index')
995+
v[:] = []
996+
self.db.commit()
997+
self.reindex = 1
998+
def should_reindex(self):
999+
return self.reindex
1000+
def _getprops(self, classname):
1001+
props = self.propcache.get(classname, None)
1002+
if props is None:
1003+
props = self.datadb.view(classname).structure()
1004+
props = [prop.name for prop in props]
1005+
self.propcache[classname] = props
1006+
return props
1007+
def _getpropid(self, classname, propname):
1008+
return self._getprops(classname).index(propname)
1009+
def _getpropname(self, classname, propid):
1010+
return self._getprops(classname)[propid]
1011+
def add_text(self, identifier, text, mime_type='text/plain'):
1012+
if mime_type != 'text/plain':
1013+
return
1014+
classname, nodeid, property = identifier
1015+
tbls = self.datadb.view('tables')
1016+
tblid = tbls.find(name=classname)
1017+
if tblid < 0:
1018+
raise KeyError, "unknown class %r"%classname
1019+
nodeid = int(nodeid)
1020+
propid = self._getpropid(classname, property)
1021+
pos = self.db.view('ids').append(tblid=tblid,nodeid=nodeid,propid=propid)
1022+
1023+
wordlist = re.findall(r'\b\w{3,25}\b', text)
1024+
words = {}
1025+
for word in wordlist:
1026+
word = word.upper()
1027+
if not self.disallows.has_key(word):
1028+
words[word] = 1
1029+
words = words.keys()
1030+
1031+
index = self.db.view('index').ordered(1)
1032+
for word in words:
1033+
ndx = index.find(word=word)
1034+
if ndx < 0:
1035+
ndx = index.append(word=word)
1036+
hits = index[ndx].hits
1037+
if len(hits)==0 or hits.find(pos=pos) < 0:
1038+
hits.append(pos=pos)
1039+
self.changed = 1
1040+
def find(self, wordlist):
1041+
hits = None
1042+
index = self.db.view('index').ordered(1)
1043+
for word in wordlist:
1044+
if not 2 < len(word) < 26:
1045+
continue
1046+
ndx = index.find(word=word)
1047+
if ndx < 0:
1048+
return {}
1049+
if hits is None:
1050+
hits = index[ndx].hits
1051+
else:
1052+
hits = hits.intersect(index[ndx].hits)
1053+
if len(hits) == 0:
1054+
return {}
1055+
if hits is None:
1056+
return {}
1057+
rslt = {}
1058+
ids = self.db.view('ids').remapwith(hits)
1059+
tbls = self.datadb.view('tables')
1060+
for i in range(len(ids)):
1061+
hit = ids[i]
1062+
classname = tbls[hit.tblid].name
1063+
nodeid = str(hit.nodeid)
1064+
property = self._getpropname(classname, hit.propid)
1065+
rslt[i] = (classname, nodeid, property)
1066+
return rslt
1067+
def save_index(self):
1068+
if self.changed:
1069+
self.db.commit()
1070+
self.changed = 0
1071+
1072+
1073+
1074+

0 commit comments

Comments
 (0)