11''' This implements the full-text indexer using Whoosh.
22'''
3- import re , os
3+ import os
44
55from whoosh import fields , qparser , index , query , analysis
66
77from roundup .backends .indexer_common import Indexer as IndexerBase
88from roundup .anypy .strings import us2u
99
10+
1011class Indexer (IndexerBase ):
1112 def __init__ (self , db ):
1213 IndexerBase .__init__ (self , db )
@@ -23,13 +24,13 @@ def _get_index(self):
2324 # StandardAnalyzer lowercases all words and configure it to
2425 # block stopwords and words with lengths not between
2526 # self.minlength and self.maxlength from indexer_common
26- stopfilter = analysis .StandardAnalyzer ( #stoplist=self.stopwords,
27+ stopfilter = analysis .StandardAnalyzer ( #stoplist=self.stopwords,
2728 minsize = self .minlength ,
2829 maxsize = self .maxlength )
2930 os .mkdir (path )
3031 schema = fields .Schema (identifier = fields .ID (stored = True ,
3132 unique = True ),
32- content = fields .TEXT (analyzer = stopfilter ))
33+ content = fields .TEXT (analyzer = stopfilter ))
3334 index .create_in (path , schema )
3435 self .index = index .open_dir (path )
3536 return self .index
@@ -84,7 +85,7 @@ def add_text(self, identifier, text, mime_type='text/plain'):
8485 # We use the identifier twice: once in the actual "text" being
8586 # indexed so we can search on it, and again as the "data" being
8687 # indexed so we know what we're matching when we get results
87- identifier = u"%s:%s:%s" % identifier
88+ identifier = u"%s:%s:%s" % identifier
8889
8990 # FIXME need to enhance this to handle the whoosh.store.LockError
9091 # that maybe raised if there is already another process with a lock.
@@ -111,19 +112,18 @@ def find(self, wordlist):
111112 * more rules here
112113 '''
113114
114- wordlist = [ word for word in wordlist
115- if (self .minlength <= len (word ) <= self .maxlength ) and
116- not self .is_stopword (word .upper ()) ]
115+ wordlist = [word for word in wordlist
116+ if (self .minlength <= len (word ) <= self .maxlength ) and
117+ not self .is_stopword (word .upper ())]
117118
118119 if not wordlist :
119120 return {}
120121
121122 searcher = self ._get_searcher ()
122- q = query .And ([ query .FuzzyTerm ("content" , word .lower ())
123- for word in wordlist ])
123+ q = query .And ([query .FuzzyTerm ("content" , word .lower ())
124+ for word in wordlist ])
124125
125126 results = searcher .search (q , limit = None )
126127
127128 return [tuple (result ["identifier" ].split (':' ))
128129 for result in results ]
129-
0 commit comments