Skip to content

Commit 1deaed2

Browse files
author
Johannes Gijsbers
committed
Move search method duplicated in indexer_dbm and indexer_tsearch2...
...to indexer_common.
1 parent 8522136 commit 1deaed2

File tree

3 files changed

+103
-148
lines changed

3 files changed

+103
-148
lines changed

roundup/backends/back_tsearch2.py

Lines changed: 26 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from roundup.backends import back_postgresql, tsearch2_setup, indexer_rdbms
1010
from roundup.backends.back_postgresql import db_create, db_nuke, db_command
1111
from roundup.backends.back_postgresql import pg_command, db_exists, Class, IssueClass, FileClass
12+
from roundup.backends.indexer_common import _isLink, Indexer
1213

1314
# XXX: Should probably be on the Class class.
1415
def _indexedProps(spec):
@@ -24,10 +25,6 @@ def _getQueryDict(spec):
2425
query_dict['triggername'] = "%(tablename)s_tsvectorupdate" % query_dict
2526
return query_dict
2627

27-
def _isLink(propclass):
28-
return (isinstance(propclass, hyperdb.Link) or
29-
isinstance(propclass, hyperdb.Multilink))
30-
3128
class Database(back_postgresql.Database):
3229
def __init__(self, config, journaltag=None):
3330
back_postgresql.Database.__init__(self, config, journaltag)
@@ -86,86 +83,16 @@ def determine_all_columns(self, spec):
8683
cols.append(('idxFTI', 'tsvector'))
8784
return cols, mls
8885

89-
class Indexer:
86+
class Indexer(Indexer):
9087
def __init__(self, db):
9188
self.db = db
9289

93-
def force_reindex(self):
94-
pass
95-
90+
# This indexer never needs to reindex.
9691
def should_reindex(self):
97-
pass
98-
99-
def save_index(self):
100-
pass
101-
102-
def add_text(self, identifier, text, mime_type=None):
103-
pass
104-
105-
def close(self):
106-
pass
107-
108-
def search(self, search_terms, klass, ignore={},
109-
dre=re.compile(r'([^\d]+)(\d+)')):
110-
'''Display search results looking for [search, terms] associated
111-
with the hyperdb Class "klass". Ignore hits on {class: property}.
112-
113-
"dre" is a helper, not an argument.
114-
'''
115-
# do the index lookup
116-
hits = self.find(search_terms, klass)
117-
if not hits:
118-
return {}
92+
return False
11993

120-
designator_propname = {}
121-
for nm, propclass in klass.getprops().items():
122-
if (isinstance(propclass, hyperdb.Link)
123-
or isinstance(propclass, hyperdb.Multilink)):
124-
designator_propname[propclass.classname] = nm
125-
126-
# build a dictionary of nodes and their associated messages
127-
# and files
128-
nodeids = {} # this is the answer
129-
propspec = {} # used to do the klass.find
130-
for propname in designator_propname.values():
131-
propspec[propname] = {} # used as a set (value doesn't matter)
132-
133-
for classname, nodeid in hits:
134-
# if it's a property on klass, it's easy
135-
if classname == klass.classname:
136-
if not nodeids.has_key(nodeid):
137-
nodeids[nodeid] = {}
138-
continue
139-
140-
# make sure the class is a linked one, otherwise ignore
141-
if not designator_propname.has_key(classname):
142-
continue
143-
144-
# it's a linked class - set up to do the klass.find
145-
linkprop = designator_propname[classname] # eg, msg -> messages
146-
propspec[linkprop][nodeid] = 1
147-
148-
# retain only the meaningful entries
149-
for propname, idset in propspec.items():
150-
if not idset:
151-
del propspec[propname]
152-
153-
# klass.find tells me the klass nodeids the linked nodes relate to
154-
for resid in klass.find(**propspec):
155-
resid = str(resid)
156-
if not nodeids.has_key(id):
157-
nodeids[resid] = {}
158-
node_dict = nodeids[resid]
159-
# now figure out where it came from
160-
for linkprop in propspec.keys():
161-
for nodeid in klass.get(resid, linkprop):
162-
if propspec[linkprop].has_key(nodeid):
163-
# OK, this node[propname] has a winner
164-
if not node_dict.has_key(linkprop):
165-
node_dict[linkprop] = [nodeid]
166-
else:
167-
node_dict[linkprop].append(nodeid)
168-
return nodeids
94+
def getHits(self, search_terms, klass):
95+
return self.find(search_terms, klass)
16996

17097
def find(self, search_terms, klass):
17198
if not search_terms:
@@ -178,7 +105,7 @@ def find(self, search_terms, klass):
178105
if _isLink(propclass):
179106
nodeids.extend(self.tsearchQuery(propclass.classname, search_terms))
180107

181-
return nodeids
108+
return dict(enumerate(nodeids))
182109

183110
def tsearchQuery(self, classname, search_terms):
184111
query = """SELECT id FROM _%(classname)s
@@ -196,8 +123,25 @@ def tsearchQuery(self, classname, search_terms):
196123
if 'type' in klass.getprops():
197124
nodeids = [nodeid for nodeid in nodeids
198125
if klass.get(nodeid, 'type') == 'text/plain']
199-
200-
return [(classname, nodeid) for nodeid in nodeids]
126+
127+
# XXX: We haven't implemented property-level search, so I'm just faking
128+
# it here with a property named 'XXX'. We still need to fix the other
129+
# backends and indexer_common.Indexer.search to only want to unpack two
130+
# values.
131+
return [(classname, nodeid, 'XXX') for nodeid in nodeids]
132+
133+
# These only exist to satisfy the interface that's expected from indexers.
134+
def force_reindex(self):
135+
pass
136+
137+
def save_index(self):
138+
pass
139+
140+
def add_text(self, identifier, text, mime_type=None):
141+
pass
142+
143+
def close(self):
144+
pass
201145

202146
class FileClass(hyperdb.FileClass, Class):
203147
'''This class defines a large chunk of data. To support this, it has a

roundup/backends/indexer_common.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import re
2+
3+
from roundup import hyperdb
4+
5+
def _isLink(propclass):
6+
return (isinstance(propclass, hyperdb.Link) or
7+
isinstance(propclass, hyperdb.Multilink))
8+
9+
class Indexer:
10+
def getHits(self, search_terms, klass):
11+
return self.find(search_terms)
12+
13+
def search(self, search_terms, klass, ignore={}):
14+
'''Display search results looking for [search, terms] associated
15+
with the hyperdb Class "klass". Ignore hits on {class: property}.
16+
17+
"dre" is a helper, not an argument.
18+
'''
19+
# do the index lookup
20+
hits = self.getHits(search_terms, klass)
21+
if not hits:
22+
return {}
23+
24+
designator_propname = {}
25+
for nm, propclass in klass.getprops().items():
26+
if _isLink(propclass):
27+
designator_propname[propclass.classname] = nm
28+
29+
# build a dictionary of nodes and their associated messages
30+
# and files
31+
nodeids = {} # this is the answer
32+
propspec = {} # used to do the klass.find
33+
for propname in designator_propname.values():
34+
propspec[propname] = {} # used as a set (value doesn't matter)
35+
for classname, nodeid, property in hits.values():
36+
# skip this result if we don't care about this class/property
37+
if ignore.has_key((classname, property)):
38+
continue
39+
40+
# if it's a property on klass, it's easy
41+
if classname == klass.classname:
42+
if not nodeids.has_key(nodeid):
43+
nodeids[nodeid] = {}
44+
continue
45+
46+
# make sure the class is a linked one, otherwise ignore
47+
if not designator_propname.has_key(classname):
48+
continue
49+
50+
# it's a linked class - set up to do the klass.find
51+
linkprop = designator_propname[classname] # eg, msg -> messages
52+
propspec[linkprop][nodeid] = 1
53+
54+
# retain only the meaningful entries
55+
for propname, idset in propspec.items():
56+
if not idset:
57+
del propspec[propname]
58+
59+
# klass.find tells me the klass nodeids the linked nodes relate to
60+
for resid in klass.find(**propspec):
61+
resid = str(resid)
62+
if not nodeids.has_key(id):
63+
nodeids[resid] = {}
64+
node_dict = nodeids[resid]
65+
# now figure out where it came from
66+
for linkprop in propspec.keys():
67+
for nodeid in klass.get(resid, linkprop):
68+
if propspec[linkprop].has_key(nodeid):
69+
# OK, this node[propname] has a winner
70+
if not node_dict.has_key(linkprop):
71+
node_dict[linkprop] = [nodeid]
72+
else:
73+
node_dict[linkprop].append(nodeid)
74+
return nodeids

roundup/backends/indexer_dbm.py

Lines changed: 3 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# that promote freedom, but obviously am giving up any rights
1515
# to compel such.
1616
#
17-
#$Id: indexer_dbm.py,v 1.2 2004-11-05 05:10:07 richard Exp $
17+
#$Id: indexer_dbm.py,v 1.3 2005-01-04 16:48:46 jlgijsbers Exp $
1818
'''This module provides an indexer class, RoundupIndexer, that stores text
1919
indices in a roundup instance. This class makes searching the content of
2020
messages, string properties and text files possible.
@@ -23,6 +23,7 @@
2323

2424
import os, shutil, re, mimetypes, marshal, zlib, errno
2525
from roundup.hyperdb import Link, Multilink
26+
from roundup.backends.indexer_common import Indexer
2627

2728
stopwords = [
2829
"A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY",
@@ -36,7 +37,7 @@
3637
is_stopword[word] = None
3738
is_stopword = is_stopword.has_key
3839

39-
class Indexer:
40+
class Indexer(Indexer):
4041
'''Indexes information from roundup's hyperdb to allow efficient
4142
searching.
4243
@@ -151,70 +152,6 @@ def text_splitter(self, text):
151152
# place
152153
return re.findall(r'\b\w{2,25}\b', text)
153154

154-
def search(self, search_terms, klass, ignore={},
155-
dre=re.compile(r'([^\d]+)(\d+)')):
156-
'''Display search results looking for [search, terms] associated
157-
with the hyperdb Class "klass". Ignore hits on {class: property}.
158-
159-
"dre" is a helper, not an argument.
160-
'''
161-
# do the index lookup
162-
hits = self.find(search_terms)
163-
if not hits:
164-
return {}
165-
166-
designator_propname = {}
167-
for nm, propclass in klass.getprops().items():
168-
if isinstance(propclass, Link) or isinstance(propclass, Multilink):
169-
designator_propname[propclass.classname] = nm
170-
171-
# build a dictionary of nodes and their associated messages
172-
# and files
173-
nodeids = {} # this is the answer
174-
propspec = {} # used to do the klass.find
175-
for propname in designator_propname.values():
176-
propspec[propname] = {} # used as a set (value doesn't matter)
177-
for classname, nodeid, property in hits.values():
178-
# skip this result if we don't care about this class/property
179-
if ignore.has_key((classname, property)):
180-
continue
181-
182-
# if it's a property on klass, it's easy
183-
if classname == klass.classname:
184-
if not nodeids.has_key(nodeid):
185-
nodeids[nodeid] = {}
186-
continue
187-
188-
# make sure the class is a linked one, otherwise ignore
189-
if not designator_propname.has_key(classname):
190-
continue
191-
192-
# it's a linked class - set up to do the klass.find
193-
linkprop = designator_propname[classname] # eg, msg -> messages
194-
propspec[linkprop][nodeid] = 1
195-
196-
# retain only the meaningful entries
197-
for propname, idset in propspec.items():
198-
if not idset:
199-
del propspec[propname]
200-
201-
# klass.find tells me the klass nodeids the linked nodes relate to
202-
for resid in klass.find(**propspec):
203-
resid = str(resid)
204-
if not nodeids.has_key(id):
205-
nodeids[resid] = {}
206-
node_dict = nodeids[resid]
207-
# now figure out where it came from
208-
for linkprop in propspec.keys():
209-
for nodeid in klass.get(resid, linkprop):
210-
if propspec[linkprop].has_key(nodeid):
211-
# OK, this node[propname] has a winner
212-
if not node_dict.has_key(linkprop):
213-
node_dict[linkprop] = [nodeid]
214-
else:
215-
node_dict[linkprop].append(nodeid)
216-
return nodeids
217-
218155
# we override this to ignore not 2 < word < 25 and also to fix a bug -
219156
# the (fail) case.
220157
def find(self, wordlist):

0 commit comments

Comments
 (0)