77from roundup .backends .indexer_common import Indexer as IndexerBase
88from roundup .anypy .strings import us2u , u2s
99
10+
1011class Indexer (IndexerBase ):
1112 def __init__ (self , db ):
1213 IndexerBase .__init__ (self , db )
@@ -46,34 +47,36 @@ def add_text(self, identifier, text, mime_type='text/plain'):
4647 # first, find the id of the (classname, itemid, property)
4748 a = self .db .arg
4849 sql = 'select _textid from __textids where _class=%s and ' \
49- '_itemid=%s and _prop=%s' % (a , a , a )
50+ '_itemid=%s and _prop=%s' % (a , a , a )
5051 self .db .cursor .execute (sql , identifier )
5152 r = self .db .cursor .fetchone ()
5253 if not r :
5354 # not previously indexed
5455 id = self .db .newid ('__textids' )
5556 sql = 'insert into __textids (_textid, _class, _itemid, _prop)' \
56- ' values (%s, %s, %s, %s)' % (a , a , a , a )
57+ ' values (%s, %s, %s, %s)' % (a , a , a , a )
5758 self .db .cursor .execute (sql , (id , ) + identifier )
5859 else :
5960 id = int (r [0 ])
6061 # clear out any existing indexed values
61- sql = 'delete from __words where _textid=%s' % a
62+ sql = 'delete from __words where _textid=%s' % a
6263 self .db .cursor .execute (sql , (id , ))
6364
6465 # ok, find all the unique words in the text
6566 text = us2u (text , "replace" )
6667 text = text .upper ()
6768 wordlist = [u2s (w )
6869 for w in re .findall (r'(?u)\b\w{%d,%d}\b'
69- % (self .minlength , self .maxlength ), text )]
70+ % (self .minlength , self .maxlength ),
71+ text )]
7072 words = set ()
7173 for word in wordlist :
72- if self .is_stopword (word ): continue
74+ if self .is_stopword (word ):
75+ continue
7376 words .add (word )
7477
7578 # for each word, add an entry in the db
76- sql = 'insert into __words (_word, _textid) values (%s, %s)' % (a , a )
79+ sql = 'insert into __words (_word, _textid) values (%s, %s)' % (a , a )
7780 words = [(word , id ) for word in words ]
7881 self .db .cursor .executemany (sql , words )
7982
@@ -85,28 +88,30 @@ def find(self, wordlist):
8588 if not wordlist :
8689 return []
8790
88- l = [word .upper () for word in wordlist
89- if self .minlength <= len (word ) <= self .maxlength ]
90- l = [word for word in l if not self .is_stopword (word )]
91+ cap_wl = [word .upper () for word in wordlist
92+ if self .minlength <= len (word ) <= self .maxlength ]
93+ clean_wl = [word for word in cap_wl if not self .is_stopword (word )]
9194
92- if not l :
95+ if not clean_wl :
9396 return []
9497
9598 if self .db .implements_intersect :
9699 # simple AND search
97- sql = 'select distinct(_textid) from __words where _word=%s' % self .db .arg
98- sql = '\n INTERSECT\n ' .join ([sql ]* len (l ))
99- self .db .cursor .execute (sql , tuple (l ))
100+ sql = 'select distinct(_textid) from __words where _word=%s' % (
101+ self .db .arg )
102+ sql = '\n INTERSECT\n ' .join ([sql ]* len (clean_wl ))
103+ self .db .cursor .execute (sql , tuple (clean_wl ))
100104 r = self .db .cursor .fetchall ()
101105 if not r :
102106 return []
103107 a = ',' .join ([self .db .arg ] * len (r ))
104108 sql = 'select _class, _itemid, _prop from __textids ' \
105- 'where _textid in (%s)' % a
109+ 'where _textid in (%s)' % a
106110 self .db .cursor .execute (sql , tuple ([int (row [0 ]) for row in r ]))
107111
108112 else :
109- # A more complex version for MySQL since it doesn't implement INTERSECT
113+ # A more complex version for MySQL since it doesn't
114+ # implement INTERSECT
110115
111116 # Construct SQL statement to join __words table to itself
112117 # multiple times.
@@ -119,22 +124,22 @@ def find(self, wordlist):
119124
120125 join_list = []
121126 match_list = []
122- for n in range (len (l ) - 1 ):
127+ for n in range (len (clean_wl ) - 1 ):
123128 join_list .append (join_tmpl % (n + 2 ))
124129 match_list .append (match_tmpl % (n + 2 , self .db .arg ))
125130
126- sql = sql % (' ' .join (join_list ), self .db .arg , ' ' .join (match_list ))
127- self .db .cursor .execute (sql , l )
131+ sql = sql % (' ' .join (join_list ), self .db .arg ,
132+ ' ' .join (match_list ))
133+ self .db .cursor .execute (sql , clean_wl )
128134
129135 r = [x [0 ] for x in self .db .cursor .fetchall ()]
130136 if not r :
131137 return []
132138
133139 a = ',' .join ([self .db .arg ] * len (r ))
134140 sql = 'select _class, _itemid, _prop from __textids ' \
135- 'where _textid in (%s)' % a
141+ 'where _textid in (%s)' % a
136142
137143 self .db .cursor .execute (sql , tuple (map (int , r )))
138144
139145 return self .db .cursor .fetchall ()
140-
0 commit comments