Skip to content

Commit 0d140a4

Browse files
author
Richard Jones
committed
improvements
1 parent 25d862b commit 0d140a4

File tree

1 file changed

+9
-16
lines changed

1 file changed

+9
-16
lines changed

roundup/backends/indexer_rdbms.py

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
#$Id: indexer_rdbms.py,v 1.12 2006-02-06 21:00:47 richard Exp $
1+
#$Id: indexer_rdbms.py,v 1.13 2006-04-27 06:33:18 richard Exp $
22
''' This implements the full-text indexer over two RDBMS tables. The first
33
is a mapping of words to occurance IDs. The second maps the IDs to (Class,
44
propname, itemid) instances.
55
'''
6-
import re
6+
import re, sets
77

88
from roundup.backends.indexer_common import Indexer as IndexerBase
99

@@ -45,38 +45,31 @@ def add_text(self, identifier, text, mime_type='text/plain'):
4545
self.db.cursor.execute(sql, identifier)
4646
r = self.db.cursor.fetchone()
4747
if not r:
48+
# not previously indexed
4849
id = self.db.newid('__textids')
4950
sql = 'insert into __textids (_textid, _class, _itemid, _prop)'\
5051
' values (%s, %s, %s, %s)'%(a, a, a, a)
5152
self.db.cursor.execute(sql, (id, ) + identifier)
52-
self.db.cursor.execute('select max(_textid) from __textids')
53-
id = self.db.cursor.fetchone()[0]
5453
else:
5554
id = int(r[0])
5655
# clear out any existing indexed values
5756
sql = 'delete from __words where _textid=%s'%a
5857
self.db.cursor.execute(sql, (id, ))
5958

60-
# ok, find all the words in the text
59+
# ok, find all the unique words in the text
6160
text = unicode(text, "utf-8", "replace").upper()
6261
wordlist = [w.encode("utf-8", "replace")
6362
for w in re.findall(r'(?u)\b\w{2,25}\b', text)]
64-
words = {}
63+
words = sets.Set()
6564
for word in wordlist:
6665
if self.is_stopword(word): continue
6766
if len(word) > 25: continue
68-
words[word] = 1
69-
words = words.keys()
67+
words.add(word)
7068

7169
# for each word, add an entry in the db
72-
for word in words:
73-
# don't dupe
74-
sql = 'select * from __words where _word=%s and _textid=%s'%(a, a)
75-
self.db.cursor.execute(sql, (word, id))
76-
if self.db.cursor.fetchall():
77-
continue
78-
sql = 'insert into __words (_word, _textid) values (%s, %s)'%(a, a)
79-
self.db.cursor.execute(sql, (word, id))
70+
sql = 'insert into __words (_word, _textid) values (%s, %s)'%(a, a)
71+
words = [(word, id) for word in words]
72+
self.db.cursor.execute(sql, words)
8073

8174
def find(self, wordlist):
8275
'''look up all the words in the wordlist.

0 commit comments

Comments
 (0)