Skip to content

Commit bb6c4df

Browse files
author
Richard Jones
committed
Added reindex command to roundup-admin.
Fixed reindex on first access. Also fixed reindexing of entries that change.
1 parent e5b4bc3 commit bb6c4df

File tree

7 files changed

+153
-49
lines changed

7 files changed

+153
-49
lines changed

roundup/admin.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1717
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1818
#
19-
# $Id: admin.py,v 1.15 2002-06-17 23:14:44 richard Exp $
19+
# $Id: admin.py,v 1.16 2002-07-09 04:19:09 richard Exp $
2020

2121
import sys, os, getpass, getopt, re, UserDict, shlex, shutil
2222
try:
@@ -964,6 +964,17 @@ def do_pack(self, args):
964964
self.db.pack(pack_before)
965965
return 0
966966

967+
def do_reindex(self, args):
968+
'''Usage: reindex
969+
Re-generate an instance's search indexes.
970+
971+
This will re-generate the search indexes for an instance. This will
972+
typically happen automatically.
973+
'''
974+
self.db.indexer.force_reindex()
975+
self.db.reindex()
976+
return 0
977+
967978
def run_command(self, args):
968979
'''Run a single command
969980
'''
@@ -1114,6 +1125,9 @@ def main(self):
11141125

11151126
#
11161127
# $Log: not supported by cvs2svn $
1128+
# Revision 1.15 2002/06/17 23:14:44 richard
1129+
# . #569415 ] {version}
1130+
#
11171131
# Revision 1.14 2002/06/11 06:41:50 richard
11181132
# Removed prompt for admin email in initialisation.
11191133
#

roundup/backends/back_anydbm.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
#$Id: back_anydbm.py,v 1.39 2002-07-09 03:02:52 richard Exp $
18+
#$Id: back_anydbm.py,v 1.40 2002-07-09 04:19:09 richard Exp $
1919
'''
2020
This module defines a backend that saves the hyperdatabase in a database
2121
chosen by anydbm. It is guaranteed to always be available in python
@@ -69,8 +69,10 @@ def __init__(self, config, journaltag=None):
6969
def post_init(self):
7070
"""Called once the schema initialisation has finished."""
7171
# reindex the db if necessary
72-
if not self.indexer.should_reindex():
73-
return
72+
if self.indexer.should_reindex():
73+
self.reindex()
74+
75+
def reindex(self):
7476
for klass in self.classes.values():
7577
for nodeid in klass.list():
7678
klass.index(nodeid)
@@ -507,6 +509,22 @@ def rollback(self):
507509

508510
#
509511
#$Log: not supported by cvs2svn $
512+
#Revision 1.39 2002/07/09 03:02:52 richard
513+
#More indexer work:
514+
#- all String properties may now be indexed too. Currently there's a bit of
515+
# "issue" specific code in the actual searching which needs to be
516+
# addressed. In a nutshell:
517+
# + pass 'indexme="yes"' as a String() property initialisation arg, eg:
518+
# file = FileClass(db, "file", name=String(), type=String(),
519+
# comment=String(indexme="yes"))
520+
# + the comment will then be indexed and be searchable, with the results
521+
# related back to the issue that the file is linked to
522+
#- as a result of this work, the FileClass has a default MIME type that may
523+
# be overridden in a subclass, or by the use of a "type" property as is
524+
# done in the default templates.
525+
#- the regeneration of the indexes (if necessary) is done once the schema is
526+
# set up in the dbinit.
527+
#
510528
#Revision 1.38 2002/07/08 06:58:15 richard
511529
#cleaned up the indexer code:
512530
# - it splits more words out (much simpler, faster splitter)

roundup/cgi_client.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
# $Id: cgi_client.py,v 1.133 2002-07-08 15:32:05 gmcm Exp $
18+
# $Id: cgi_client.py,v 1.134 2002-07-09 04:19:09 richard Exp $
1919

2020
__doc__ = """
2121
WWW request handler (also used in the stand-alone server).
@@ -26,7 +26,6 @@
2626

2727
import roundupdb, htmltemplate, date, hyperdb, password
2828
from roundup.i18n import _
29-
from roundup.indexer import Indexer
3029

3130
class Unauthorised(ValueError):
3231
pass
@@ -73,10 +72,6 @@ def __init__(self, instance, request, env, form=None):
7372
# someone gave us a non-int debug level, turn it off
7473
self.debug = 0
7574

76-
# used for searching the indexes
77-
self.indexer = Indexer('%s/db'%instance.INSTANCE_HOME)
78-
79-
8075
def getuid(self):
8176
try:
8277
return self.db.user.lookup(self.user)
@@ -1459,6 +1454,10 @@ def parsePropsFromForm(db, cl, form, nodeid=0, num_re=re.compile('^\d+$')):
14591454

14601455
#
14611456
# $Log: not supported by cvs2svn $
1457+
# Revision 1.133 2002/07/08 15:32:05 gmcm
1458+
# Pagination of index pages.
1459+
# New search form.
1460+
#
14621461
# Revision 1.132 2002/07/08 07:26:14 richard
14631462
# ehem
14641463
#

roundup/htmltemplate.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
# $Id: htmltemplate.py,v 1.95 2002-07-08 15:32:06 gmcm Exp $
18+
# $Id: htmltemplate.py,v 1.96 2002-07-09 04:19:09 richard Exp $
1919

2020
__doc__ = """
2121
Template engine.
@@ -874,7 +874,7 @@ def render(self, filterspec={}, search_text='', filter=[], columns=[],
874874
matches = None
875875
if nodeids is None:
876876
if search_text != '':
877-
matches = self.client.indexer.search(
877+
matches = self.db.indexer.search(
878878
search_text.split(' '), self.cl)
879879
nodeids = self.cl.filter(matches, filterspec, sort, group)
880880
for nodeid in nodeids[startwith:startwith+pagesize]:
@@ -1237,6 +1237,10 @@ def render(self, form):
12371237

12381238
#
12391239
# $Log: not supported by cvs2svn $
1240+
# Revision 1.95 2002/07/08 15:32:06 gmcm
1241+
# Pagination of index pages.
1242+
# New search form.
1243+
#
12401244
# Revision 1.94 2002/06/27 15:38:53 gmcm
12411245
# Fix the cycles (a clear method, called after render, that removes
12421246
# the bound methods from the globals dict).

roundup/indexer.py

Lines changed: 48 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# that promote freedom, but obviously am giving up any rights
1515
# to compel such.
1616
#
17-
#$Id: indexer.py,v 1.4 2002-07-09 03:02:52 richard Exp $
17+
#$Id: indexer.py,v 1.5 2002-07-09 04:19:09 richard Exp $
1818
'''
1919
This module provides an indexer class, RoundupIndexer, that stores text
2020
indices in a roundup instance. This class makes searching the content of
@@ -25,29 +25,35 @@
2525
class Indexer:
2626
''' Indexes information from roundup's hyperdb to allow efficient
2727
searching.
28+
29+
Three structures are created by the indexer:
30+
files {identifier: (fileid, wordcount)}
31+
words {word: {fileid: count}}
32+
fileids {fileid: identifier}
2833
'''
2934
def __init__(self, db_path):
30-
indexdb_path = os.path.join(db_path, 'indexes')
31-
self.indexdb = os.path.join(indexdb_path, 'index.db')
35+
self.indexdb_path = os.path.join(db_path, 'indexes')
36+
self.indexdb = os.path.join(self.indexdb_path, 'index.db')
3237
self.reindex = 0
3338
self.casesensitive = 0
3439
self.quiet = 9
3540

3641
# see if we need to reindex because of a change in code
37-
if (not os.path.exists(indexdb_path) or
38-
not os.path.exists(os.path.join(indexdb_path, 'version'))):
42+
if (not os.path.exists(self.indexdb_path) or
43+
not os.path.exists(os.path.join(self.indexdb_path, 'version'))):
3944
# TODO: if the version file exists (in the future) we'll want to
4045
# check the value in it - for now the file itself is a flag
41-
if os.path.exists(indexdb_path):
42-
shutil.rmtree(indexdb_path)
43-
os.makedirs(indexdb_path)
44-
os.chmod(indexdb_path, 0775)
45-
open(os.path.join(indexdb_path, 'version'), 'w').write('1\n')
46-
47-
# we need to reindex
48-
self.reindex = 1
49-
else:
50-
self.reindex = 0
46+
self.force_reindex()
47+
48+
def force_reindex(self):
49+
'''Force a reindex condition
50+
'''
51+
if os.path.exists(self.indexdb_path):
52+
shutil.rmtree(self.indexdb_path)
53+
os.makedirs(self.indexdb_path)
54+
os.chmod(self.indexdb_path, 0775)
55+
open(os.path.join(self.indexdb_path, 'version'), 'w').write('1\n')
56+
self.reindex = 1
5157

5258
def should_reindex(self):
5359
'''Should we reindex?
@@ -61,16 +67,9 @@ def add_text(self, identifier, text, mime_type='text/plain'):
6167
# make sure the index is loaded
6268
self.load_index()
6369

64-
# Is file eligible for (re)indexing?
70+
# remove old entries for this identifier
6571
if self.files.has_key(identifier):
66-
# Reindexing enabled, cleanup dicts
67-
if self.reindex:
68-
self.purge_entry(identifier, self.files, self.words)
69-
else:
70-
# DO NOT reindex this file
71-
if self.quiet < 5:
72-
print "Not reindexing", identifier
73-
return 0
72+
self.purge_entry(identifier)
7473

7574
# split into words
7675
words = self.splitter(text, mime_type)
@@ -281,26 +280,43 @@ def save_index(self):
281280
pickle_fh.write(zlib.compress(pickle_str))
282281
os.chmod(filename, 0664)
283282

284-
def purge_entry(self, fname, file_dct, word_dct):
283+
def purge_entry(self, identifier):
285284
''' Remove a file from file index and word index
286285
'''
287-
try: # The easy part, cleanup the file index
288-
file_index = file_dct[fname]
289-
del file_dct[fname]
290-
except KeyError:
291-
pass # We'll assume we only encounter KeyError's
286+
if not self.files.has_key(identifier):
287+
return
288+
289+
file_index = self.files[identifier][0]
290+
del self.files[identifier]
291+
del self.fileids[file_index]
292+
292293
# The much harder part, cleanup the word index
293-
for word, occurs in word_dct.items():
294+
for key, occurs in self.words.items():
294295
if occurs.has_key(file_index):
295296
del occurs[file_index]
296-
word_dct[word] = occurs
297297

298298
def index_loaded(self):
299299
return (hasattr(self,'fileids') and hasattr(self,'files') and
300300
hasattr(self,'words'))
301301

302302
#
303303
#$Log: not supported by cvs2svn $
304+
#Revision 1.4 2002/07/09 03:02:52 richard
305+
#More indexer work:
306+
#- all String properties may now be indexed too. Currently there's a bit of
307+
# "issue" specific code in the actual searching which needs to be
308+
# addressed. In a nutshell:
309+
# + pass 'indexme="yes"' as a String() property initialisation arg, eg:
310+
# file = FileClass(db, "file", name=String(), type=String(),
311+
# comment=String(indexme="yes"))
312+
# + the comment will then be indexed and be searchable, with the results
313+
# related back to the issue that the file is linked to
314+
#- as a result of this work, the FileClass has a default MIME type that may
315+
# be overridden in a subclass, or by the use of a "type" property as is
316+
# done in the default templates.
317+
#- the regeneration of the indexes (if necessary) is done once the schema is
318+
# set up in the dbinit.
319+
#
304320
#Revision 1.3 2002/07/08 06:58:15 richard
305321
#cleaned up the indexer code:
306322
# - it splits more words out (much simpler, faster splitter)

roundup/roundupdb.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
# $Id: roundupdb.py,v 1.60 2002-07-09 03:02:52 richard Exp $
18+
# $Id: roundupdb.py,v 1.61 2002-07-09 04:19:09 richard Exp $
1919

2020
__doc__ = """
2121
Extending hyperdb with types specific to issue-tracking.
@@ -313,7 +313,7 @@ def __init__(self, db, classname, **properties):
313313
dictionary attempts to specify any of these properties or a
314314
"creation" or "activity" property, a ValueError is raised."""
315315
if not properties.has_key('title'):
316-
properties['title'] = hyperdb.String()
316+
properties['title'] = hyperdb.String(indexme='yes')
317317
if not properties.has_key('messages'):
318318
properties['messages'] = hyperdb.Multilink("msg")
319319
if not properties.has_key('files'):
@@ -691,6 +691,22 @@ def generateChangeNote(self, nodeid, oldvalues):
691691

692692
#
693693
# $Log: not supported by cvs2svn $
694+
# Revision 1.60 2002/07/09 03:02:52 richard
695+
# More indexer work:
696+
# - all String properties may now be indexed too. Currently there's a bit of
697+
# "issue" specific code in the actual searching which needs to be
698+
# addressed. In a nutshell:
699+
# + pass 'indexme="yes"' as a String() property initialisation arg, eg:
700+
# file = FileClass(db, "file", name=String(), type=String(),
701+
# comment=String(indexme="yes"))
702+
# + the comment will then be indexed and be searchable, with the results
703+
# related back to the issue that the file is linked to
704+
# - as a result of this work, the FileClass has a default MIME type that may
705+
# be overridden in a subclass, or by the use of a "type" property as is
706+
# done in the default templates.
707+
# - the regeneration of the indexes (if necessary) is done once the schema is
708+
# set up in the dbinit.
709+
#
694710
# Revision 1.59 2002/06/18 03:55:25 dman13
695711
# Fixed name/address display problem introduced by an earlier change.
696712
# (instead of "name<addr>" display "name <addr>")

test/test_db.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
# $Id: test_db.py,v 1.24 2002-07-09 03:02:53 richard Exp $
18+
# $Id: test_db.py,v 1.25 2002-07-09 04:19:09 richard Exp $
1919

2020
import unittest, os, shutil
2121

@@ -314,8 +314,29 @@ def testSearching(self):
314314
{'2': {}})
315315
self.assertEquals(self.db.indexer.search(['flebble'], self.db.issue),
316316
{'2': {}, '1': {}})
317-
self.assertEquals(self.db.indexer.search(['blah'], self.db.issue),
318-
{'1': {'files': ['2']}})
317+
318+
def testReindexing(self):
319+
self.db.issue.create(title="frooz")
320+
self.db.commit()
321+
self.assertEquals(self.db.indexer.search(['frooz'], self.db.issue),
322+
{'1': {}})
323+
self.db.issue.set('1', title="dooble")
324+
self.db.commit()
325+
self.assertEquals(self.db.indexer.search(['dooble'], self.db.issue),
326+
{'1': {}})
327+
self.assertEquals(self.db.indexer.search(['frooz'], self.db.issue), {})
328+
329+
def testForcedReindexing(self):
330+
self.db.issue.create(title="flebble frooz")
331+
self.db.commit()
332+
self.assertEquals(self.db.indexer.search(['flebble'], self.db.issue),
333+
{'1': {}})
334+
self.db.indexer.quiet = 1
335+
self.db.indexer.force_reindex()
336+
self.db.post_init()
337+
self.db.indexer.quiet = 9
338+
self.assertEquals(self.db.indexer.search(['flebble'], self.db.issue),
339+
{'1': {}})
319340

320341
class anydbmReadOnlyDBTestCase(MyTestCase):
321342
def setUp(self):
@@ -419,6 +440,22 @@ def suite():
419440

420441
#
421442
# $Log: not supported by cvs2svn $
443+
# Revision 1.24 2002/07/09 03:02:53 richard
444+
# More indexer work:
445+
# - all String properties may now be indexed too. Currently there's a bit of
446+
# "issue" specific code in the actual searching which needs to be
447+
# addressed. In a nutshell:
448+
# + pass 'indexme="yes"' as a String() property initialisation arg, eg:
449+
# file = FileClass(db, "file", name=String(), type=String(),
450+
# comment=String(indexme="yes"))
451+
# + the comment will then be indexed and be searchable, with the results
452+
# related back to the issue that the file is linked to
453+
# - as a result of this work, the FileClass has a default MIME type that may
454+
# be overridden in a subclass, or by the use of a "type" property as is
455+
# done in the default templates.
456+
# - the regeneration of the indexes (if necessary) is done once the schema is
457+
# set up in the dbinit.
458+
#
422459
# Revision 1.23 2002/06/20 23:51:48 richard
423460
# Cleaned up the hyperdb tests
424461
#

0 commit comments

Comments
 (0)