1414# that promote freedom, but obviously am giving up any rights
1515# to compel such.
1616#
17- #$Id: indexer.py,v 1.6 2002-07-09 04:26:44 richard Exp $
17+ #$Id: indexer.py,v 1.7 2002-07-09 21:38:43 richard Exp $
1818'''
1919This module provides an indexer class, RoundupIndexer, that stores text
2020indices in a roundup instance. This class makes searching the content of
@@ -35,8 +35,8 @@ def __init__(self, db_path):
3535 self .indexdb_path = os .path .join (db_path , 'indexes' )
3636 self .indexdb = os .path .join (self .indexdb_path , 'index.db' )
3737 self .reindex = 0
38- self .casesensitive = 0
3938 self .quiet = 9
39+ self .changed = 0
4040
4141 # see if we need to reindex because of a change in code
4242 if (not os .path .exists (self .indexdb_path ) or
@@ -54,6 +54,7 @@ def force_reindex(self):
5454 os .chmod (self .indexdb_path , 0775 )
5555 open (os .path .join (self .indexdb_path , 'version' ), 'w' ).write ('1\n ' )
5656 self .reindex = 1
57+ self .changed = 1
5758
5859 def should_reindex (self ):
5960 '''Should we reindex?
@@ -102,21 +103,23 @@ def add_text(self, identifier, text, mime_type='text/plain'):
102103 # make a reference to the file for this word
103104 entry [file_index ] = filedict [word ]
104105
106+ # save needed
107+ self .changed = 1
108+
105109 def splitter (self , text , ftype ):
106110 ''' Split the contents of a text string into a list of 'words'
107111 '''
108112 if ftype == 'text/plain' :
109- words = self .text_splitter (text , self . casesensitive )
113+ words = self .text_splitter (text )
110114 else :
111115 return []
112116 return words
113117
114- def text_splitter (self , text , casesensitive = 0 ):
118+ def text_splitter (self , text ):
115119 """Split text/plain string into a list of words
116120 """
117- # Let's adjust case if not case-sensitive
118- if not casesensitive :
119- text = text .upper ()
121+ # case insensitive
122+ text = text .upper ()
120123
121124 # Split the raw text, losing anything longer than 25 characters
122125 # since that'll be gibberish (encoded text or somesuch) or shorter
@@ -183,8 +186,7 @@ def find(self, wordlist):
183186 if not 2 < len (word ) < 25 :
184187 # word outside the bounds of what we index - ignore
185188 continue
186- if not self .casesensitive :
187- word = word .upper ()
189+ word = word .upper ()
188190 entry = self .words .get (word ) # For each word, get index
189191 entries [word ] = entry # of matching files
190192 if not entry : # Nothing for this one word (fail)
@@ -244,10 +246,12 @@ def load_index(self, reload=0, wordlist=None):
244246 self .words = db ['WORDS' ]
245247 self .files = db ['FILES' ]
246248 self .fileids = db ['FILEIDS' ]
249+ self .changed = 0
247250
248251 def save_index (self ):
249- # make sure we're loaded
250- self .load_index ()
252+ # only save if the index is loaded and changed
253+ if not self .index_loaded () or not self .changed :
254+ return
251255
252256 # brutal space saver... delete all the small segments
253257 for segment in self .segments :
@@ -280,6 +284,9 @@ def save_index(self):
280284 pickle_fh .write (zlib .compress (pickle_str ))
281285 os .chmod (filename , 0664 )
282286
287+ # save done
288+ self .changed = 0
289+
283290 def purge_entry (self , identifier ):
284291 ''' Remove a file from file index and word index
285292 '''
@@ -295,12 +302,18 @@ def purge_entry(self, identifier):
295302 if occurs .has_key (file_index ):
296303 del occurs [file_index ]
297304
305+ # save needed
306+ self .changed = 1
307+
298308 def index_loaded (self ):
299309 return (hasattr (self ,'fileids' ) and hasattr (self ,'files' ) and
300310 hasattr (self ,'words' ))
301311
302312#
303313#$Log: not supported by cvs2svn $
314+ #Revision 1.6 2002/07/09 04:26:44 richard
315+ #We're indexing numbers now, and _underscore words
316+ #
304317#Revision 1.5 2002/07/09 04:19:09 richard
305318#Added reindex command to roundup-admin.
306319#Fixed reindex on first access.
0 commit comments