Skip to content

Commit 2f3dd09

Browse files
author
Gordon B. McMillan
committed
Optimize Class.find so that the propspec can contain a set of ids to match.
This is used by indexer.search so it can do just one find for all the index matches. This was already confusing code, but for common terms (lots of index matches), it is enormously faster.
1 parent b00c0d5 commit 2f3dd09

File tree

2 files changed

+84
-32
lines changed

2 files changed

+84
-32
lines changed

roundup/hyperdb.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
# $Id: hyperdb.py,v 1.71 2002-07-09 03:02:52 richard Exp $
18+
# $Id: hyperdb.py,v 1.72 2002-07-09 21:53:38 gmcm Exp $
1919

2020
__doc__ = """
2121
Hyperdatabase implementation, especially field types.
@@ -785,24 +785,28 @@ def lookup(self, keyvalue):
785785

786786
# XXX: change from spec - allows multiple props to match
787787
def find(self, **propspec):
788-
"""Get the ids of nodes in this class which link to a given node.
788+
"""Get the ids of nodes in this class which link to the given nodes.
789789
790-
'propspec' consists of keyword args propname=nodeid
790+
'propspec' consists of keyword args propname={nodeid:1,}
791791
'propname' must be the name of a property in this class, or a
792792
KeyError is raised. That property must be a Link or Multilink
793793
property, or a TypeError is raised.
794794
795-
'nodeid' must be the id of an existing node in the class linked
796-
to by the given property, or an IndexError is raised.
795+
Any node in this class whose 'propname' property links to any of the
796+
nodeids will be returned. Used by the full text indexing, which knows
797+
that "foo" occurs in msg1, msg3 and file7, so we have hits on these issues:
798+
db.issue.find(messages={'1':1,'3':1}, files={'7':1})
797799
"""
798800
propspec = propspec.items()
799-
for propname, nodeid in propspec:
801+
for propname, nodeids in propspec:
800802
# check the prop is OK
801803
prop = self.properties[propname]
802804
if not isinstance(prop, Link) and not isinstance(prop, Multilink):
803805
raise TypeError, "'%s' not a Link/Multilink property"%propname
804-
if not self.db.hasnode(prop.classname, nodeid):
805-
raise ValueError, '%s has no node %s'%(prop.classname, nodeid)
806+
#XXX edit is expensive and of questionable use
807+
#for nodeid in nodeids:
808+
# if not self.db.hasnode(prop.classname, nodeid):
809+
# raise ValueError, '%s has no node %s'%(prop.classname, nodeid)
806810

807811
# ok, now do the find
808812
cldb = self.db.getclassdb(self.classname)
@@ -811,16 +815,26 @@ def find(self, **propspec):
811815
node = self.db.getnode(self.classname, id, db=cldb)
812816
if node.has_key(self.db.RETIRED_FLAG):
813817
continue
814-
for propname, nodeid in propspec:
818+
for propname, nodeids in propspec:
815819
# can't test if the node doesn't have this property
816820
if not node.has_key(propname):
817821
continue
822+
if type(nodeids) is type(''):
823+
nodeids = {nodeids:1}
818824
prop = self.properties[propname]
819-
property = node[propname]
820-
if isinstance(prop, Link) and nodeid == property:
821-
l.append(id)
822-
elif isinstance(prop, Multilink) and nodeid in property:
825+
value = node[propname]
826+
if isinstance(prop, Link) and nodeids.has_key(value):
823827
l.append(id)
828+
break
829+
elif isinstance(prop, Multilink):
830+
hit = 0
831+
for v in value:
832+
if nodeids.has_key(v):
833+
l.append(id)
834+
hit = 1
835+
break
836+
if hit:
837+
break
824838
return l
825839

826840
def stringFind(self, **requirements):
@@ -1185,6 +1199,22 @@ def Choice(name, db, *options):
11851199

11861200
#
11871201
# $Log: not supported by cvs2svn $
1202+
# Revision 1.71 2002/07/09 03:02:52 richard
1203+
# More indexer work:
1204+
# - all String properties may now be indexed too. Currently there's a bit of
1205+
# "issue" specific code in the actual searching which needs to be
1206+
# addressed. In a nutshell:
1207+
# + pass 'indexme="yes"' as a String() property initialisation arg, eg:
1208+
# file = FileClass(db, "file", name=String(), type=String(),
1209+
# comment=String(indexme="yes"))
1210+
# + the comment will then be indexed and be searchable, with the results
1211+
# related back to the issue that the file is linked to
1212+
# - as a result of this work, the FileClass has a default MIME type that may
1213+
# be overridden in a subclass, or by the use of a "type" property as is
1214+
# done in the default templates.
1215+
# - the regeneration of the indexes (if necessary) is done once the schema is
1216+
# set up in the dbinit.
1217+
#
11881218
# Revision 1.70 2002/06/27 12:06:20 gmcm
11891219
# Improve an error message.
11901220
#

roundup/indexer.py

Lines changed: 41 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@
1414
# that promote freedom, but obviously am giving up any rights
1515
# to compel such.
1616
#
17-
#$Id: indexer.py,v 1.7 2002-07-09 21:38:43 richard Exp $
17+
#$Id: indexer.py,v 1.8 2002-07-09 21:53:38 gmcm Exp $
1818
'''
1919
This module provides an indexer class, RoundupIndexer, that stores text
2020
indices in a roundup instance. This class makes searching the content of
21-
messages and text files possible.
21+
messages, string properties and text files possible.
2222
'''
2323
import os, shutil, re, mimetypes, marshal, zlib, errno
24+
from hyperdb import Link, Multilink
2425

2526
class Indexer:
2627
''' Indexes information from roundup's hyperdb to allow efficient
@@ -30,6 +31,7 @@ class Indexer:
3031
files {identifier: (fileid, wordcount)}
3132
words {word: {fileid: count}}
3233
fileids {fileid: identifier}
34+
where identifier is (classname, nodeid, propertyname)
3335
'''
3436
def __init__(self, db_path):
3537
self.indexdb_path = os.path.join(db_path, 'indexes')
@@ -139,12 +141,18 @@ def search(self, search_terms, klass, ignore={},
139141
if not hits:
140142
return {}
141143

142-
# this is specific to "issue" klass ... eugh
143-
designator_propname = {'msg': 'messages', 'file': 'files'}
144+
#designator_propname = {'msg': 'messages', 'file': 'files'}
145+
designator_propname = {}
146+
for nm, propclass in klass.getprops().items():
147+
if isinstance(propclass, Link) or isinstance(propclass, Multilink):
148+
designator_propname[propclass.classname] = nm
144149

145150
# build a dictionary of nodes and their associated messages
146151
# and files
147-
nodeids = {}
152+
nodeids = {} # this is the answer
153+
propspec = {} # used to do the klass.find
154+
for propname in designator_propname.values():
155+
propspec[propname] = {} # used as a set (value doesn't matter)
148156
for classname, nodeid, property in hits.values():
149157
# skip this result if we don't care about this class/property
150158
if ignore.has_key((classname, property)):
@@ -156,20 +164,30 @@ def search(self, search_terms, klass, ignore={},
156164
nodeids[nodeid] = {}
157165
continue
158166

159-
# it's a linked class - find the klass entries that are
160-
# linked to it
161-
linkprop = designator_propname[classname]
162-
for resid in klass.find(**{linkprop: nodeid}):
163-
resid = str(resid)
164-
if not nodeids.has_key(id):
165-
nodeids[resid] = {}
166-
167-
# update the links for this klass nodeid
168-
node_dict = nodeids[resid]
169-
if not node_dict.has_key(linkprop):
170-
node_dict[linkprop] = [nodeid]
171-
elif node_dict.has_key(linkprop):
172-
node_dict[linkprop].append(nodeid)
167+
# it's a linked class - set up to do the klass.find
168+
linkprop = designator_propname[classname] # eg, msg -> messages
169+
propspec[linkprop][nodeid] = 1
170+
171+
# retain only the meaningful entries
172+
for propname, idset in propspec.items():
173+
if not idset:
174+
del propspec[propname]
175+
176+
# klass.find tells me the klass nodeids the linked nodes relate to
177+
for resid in klass.find(**propspec):
178+
resid = str(resid)
179+
if not nodeids.has_key(id):
180+
nodeids[resid] = {}
181+
node_dict = nodeids[resid]
182+
# now figure out where it came from
183+
for linkprop in propspec.keys():
184+
for nodeid in klass.get(resid, linkprop):
185+
if propspec[linkprop].has_key(nodeid):
186+
# OK, this node[propname] has a winner
187+
if not node_dict.has_key(linkprop):
188+
node_dict[linkprop] = [nodeid]
189+
else:
190+
node_dict[linkprop].append(nodeid)
173191
return nodeids
174192

175193
# we override this to ignore not 2 < word < 25 and also to fix a bug -
@@ -311,6 +329,10 @@ def index_loaded(self):
311329

312330
#
313331
#$Log: not supported by cvs2svn $
332+
#Revision 1.7 2002/07/09 21:38:43 richard
333+
#Only save the index if the thing is loaded and changed. Also, don't load
334+
#the index just for a save.
335+
#
314336
#Revision 1.6 2002/07/09 04:26:44 richard
315337
#We're indexing numbers now, and _underscore words
316338
#

0 commit comments

Comments
 (0)