Skip to content

Commit e5b4bc3

Browse files
author
Richard Jones
committed
More indexer work:
- all String properties may now be indexed too. Currently there's a bit of "issue" specific code in the actual searching which needs to be addressed. In a nutshell: + pass 'indexme="yes"' as a String() property initialisation arg, eg: file = FileClass(db, "file", name=String(), type=String(), comment=String(indexme="yes")) + the comment will then be indexed and be searchable, with the results related back to the issue that the file is linked to - as a result of this work, the FileClass has a default MIME type that may be overridden in a subclass, or by the use of a "type" property as is done in the default templates. - the regeneration of the indexes (if necessary) is done once the schema is set up in the dbinit.
1 parent c60540b commit e5b4bc3

File tree

10 files changed

+271
-142
lines changed

10 files changed

+271
-142
lines changed

CHANGES.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,19 @@ Fixed:
1212
- only index text/plain and rfc822/message (ideas for other text formats to
1313
index are welcome)
1414
- added simple unit test for indexer. Needs more tests for regression.
15+
- all String properties may now be indexed too. Currently there's a bit of
16+
"issue" specific code in the actual searching which needs to be
17+
addressed. In a nutshell:
18+
+ pass 'indexme="yes"' as a String() property initialisation arg, eg:
19+
file = FileClass(db, "file", name=String(), type=String(),
20+
comment=String(indexme="yes"))
21+
+ the comment will then be indexed and be searchable, with the results
22+
related back to the issue that the file is linked to
23+
- as a result of this work, the FileClass has a default MIME type that may
24+
be overridden in a subclass, or by the use of a "type" property as is
25+
done in the default templates.
26+
- the regeneration of the indexes (if necessary) is done once the schema is
27+
set up in the dbinit.
1528
. made the unit tests run again - they were quite b0rken
1629

1730

doc/upgrading.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,17 @@ accordingly.
77

88
.. contents::
99

10+
Migrating from 0.4.x to 0.5.0
11+
=============================
12+
13+
0.5.0 Configuration
14+
-------------------
15+
16+
TODO: mention stuff about indexing
17+
TODO: mention that the dbinit needs the db.post_init() method call for
18+
reindexing
19+
20+
1021
Migrating from 0.4.1 to 0.4.2
1122
=============================
1223

roundup/backends/back_anydbm.py

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
#$Id: back_anydbm.py,v 1.38 2002-07-08 06:58:15 richard Exp $
18+
#$Id: back_anydbm.py,v 1.39 2002-07-09 03:02:52 richard Exp $
1919
'''
2020
This module defines a backend that saves the hyperdatabase in a database
2121
chosen by anydbm. It is guaranteed to always be available in python
@@ -66,6 +66,16 @@ def __init__(self, config, journaltag=None):
6666
# ensure files are group readable and writable
6767
os.umask(0002)
6868

69+
def post_init(self):
70+
"""Called once the schema initialisation has finished."""
71+
# reindex the db if necessary
72+
if not self.indexer.should_reindex():
73+
return
74+
for klass in self.classes.values():
75+
for nodeid in klass.list():
76+
klass.index(nodeid)
77+
self.indexer.save_index()
78+
6979
def __repr__(self):
7080
return '<back_anydbm instance at %x>'%id(self)
7181

@@ -409,15 +419,24 @@ def commit(self):
409419
self.databases = {}
410420

411421
# now, do all the transactions
422+
reindex = {}
412423
for method, args in self.transactions:
413-
method(*args)
424+
reindex[method(*args)] = 1
414425

415426
# now close all the database files
416427
for db in self.databases.values():
417428
db.close()
418429
del self.databases
419430
# TODO: unlock the DB
420431

432+
# reindex the nodes that request it
433+
for classname, nodeid in filter(None, reindex.keys()):
434+
print >>hyperdb.DEBUG, 'commit.reindex', (classname, nodeid)
435+
self.getclass(classname).index(nodeid)
436+
437+
# save the indexer state
438+
self.indexer.save_index()
439+
421440
# all transactions committed, back to normal
422441
self.cache = {}
423442
self.dirtynodes = {}
@@ -439,6 +458,9 @@ def _doSaveNode(self, classname, nodeid, node):
439458
# now save the marshalled data
440459
db[nodeid] = marshal.dumps(self.serialise(classname, node))
441460

461+
# return the classname, nodeid so we reindex this content
462+
return (classname, nodeid)
463+
442464
def _doSaveJournal(self, classname, nodeid, action, params):
443465
# serialise first
444466
if action in ('set', 'create'):
@@ -477,15 +499,23 @@ def rollback(self):
477499
for method, args in self.transactions:
478500
# delete temporary files
479501
if method == self._doStoreFile:
480-
if os.path.exists(args[0]+".tmp"):
481-
os.remove(args[0]+".tmp")
502+
self._rollbackStoreFile(*args)
482503
self.cache = {}
483504
self.dirtynodes = {}
484505
self.newnodes = {}
485506
self.transactions = []
486507

487508
#
488509
#$Log: not supported by cvs2svn $
510+
#Revision 1.38 2002/07/08 06:58:15 richard
511+
#cleaned up the indexer code:
512+
# - it splits more words out (much simpler, faster splitter)
513+
# - removed code we'll never use (roundup.roundup_indexer has the full
514+
# implementation, and replaces roundup.indexer)
515+
# - only index text/plain and rfc822/message (ideas for other text formats to
516+
# index are welcome)
517+
# - added simple unit test for indexer. Needs more tests for regression.
518+
#
489519
#Revision 1.37 2002/06/20 23:52:35 richard
490520
#More informative error message
491521
#

roundup/backends/blobfiles.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
#$Id: blobfiles.py,v 1.5 2002-07-08 06:58:15 richard Exp $
18+
#$Id: blobfiles.py,v 1.6 2002-07-09 03:02:52 richard Exp $
1919
'''
2020
This module exports file storage for roundup backends.
2121
Files are stored into a directory hierarchy.
@@ -74,12 +74,19 @@ def storefile(self, classname, nodeid, property, content):
7474
None, in which case the filename does not indicate which property
7575
is being saved.
7676
'''
77+
# determine the name of the file to write to
7778
name = self.filename(classname, nodeid, property)
79+
80+
# make sure the file storage dir exists
7881
if not os.path.exists(os.path.dirname(name)):
7982
os.makedirs(os.path.dirname(name))
83+
84+
# open the temp file for writing
8085
open(name + '.tmp', 'wb').write(content)
81-
self.transactions.append((self._doStoreFile, (name, )))
8286

87+
# save off the commit action
88+
self.transactions.append((self._doStoreFile, (classname, nodeid,
89+
property)))
8390

8491
def getfile(self, classname, nodeid, property):
8592
'''Get the content of the file in the database.
@@ -88,6 +95,7 @@ def getfile(self, classname, nodeid, property):
8895
try:
8996
return open(filename, 'rb').read()
9097
except:
98+
# now try the temp pre-commit filename
9199
try:
92100
return open(filename+'.tmp', 'rb').read()
93101
except:
@@ -101,15 +109,36 @@ def numfiles(self):
101109
files_dir = os.path.join(self.dir, 'files')
102110
return files_in_dir(files_dir)
103111

104-
def _doStoreFile(self, name, **databases):
112+
def _doStoreFile(self, classname, nodeid, property, **databases):
105113
'''Store the file as part of a transaction commit.
106114
'''
115+
# determine the name of the file to write to
116+
name = self.filename(classname, nodeid, property)
117+
107118
# the file is currently ".tmp" - move it to its real name to commit
108119
os.rename(name+".tmp", name)
109-
self.indexer.add_file(name)
110-
self.indexer.save_index()
120+
121+
# return the classname, nodeid so we reindex this content
122+
return (classname, nodeid)
123+
124+
def _rollbackStoreFile(self, classname, nodeid, property, **databases):
125+
'''Remove the temp file as a part of a rollback
126+
'''
127+
# determine the name of the file to delete
128+
name = self.filename(classname, nodeid, property)
129+
if os.path.exists(name+".tmp"):
130+
os.remove(name+".tmp")
111131

112132
# $Log: not supported by cvs2svn $
133+
# Revision 1.5 2002/07/08 06:58:15 richard
134+
# cleaned up the indexer code:
135+
# - it splits more words out (much simpler, faster splitter)
136+
# - removed code we'll never use (roundup.roundup_indexer has the full
137+
# implementation, and replaces roundup.indexer)
138+
# - only index text/plain and rfc822/message (ideas for other text formats to
139+
# index are welcome)
140+
# - added simple unit test for indexer. Needs more tests for regression.
141+
#
113142
# Revision 1.4 2002/06/19 03:07:19 richard
114143
# Moved the file storage commit into blobfiles where it belongs.
115144
#

roundup/hyperdb.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
# $Id: hyperdb.py,v 1.70 2002-06-27 12:06:20 gmcm Exp $
18+
# $Id: hyperdb.py,v 1.71 2002-07-09 03:02:52 richard Exp $
1919

2020
__doc__ = """
2121
Hyperdatabase implementation, especially field types.
@@ -56,6 +56,8 @@ def traceMark():
5656
#
5757
class String:
5858
"""An object designating a String property."""
59+
def __init__(self, indexme='no'):
60+
self.indexme = indexme == 'yes'
5961
def __repr__(self):
6062
' more useful for dumps '
6163
return '<%s>'%self.__class__
@@ -157,6 +159,10 @@ def __init__(self, config, journaltag=None):
157159
"""
158160
raise NotImplementedError
159161

162+
def post_init(self):
163+
"""Called once the schema initialisation has finished."""
164+
raise NotImplementedError
165+
160166
def __getattr__(self, classname):
161167
"""A convenient way of calling self.getclass(classname)."""
162168
raise NotImplementedError
@@ -1111,6 +1117,16 @@ def addprop(self, **properties):
11111117
raise ValueError, key
11121118
self.properties.update(properties)
11131119

1120+
def index(self, nodeid):
1121+
'''Add (or refresh) the node to search indexes
1122+
'''
1123+
# find all the String properties that have indexme
1124+
for prop, propclass in self.getprops().items():
1125+
if isinstance(propclass, String) and propclass.indexme:
1126+
# and index them under (classname, nodeid, property)
1127+
self.db.indexer.add_text((self.classname, nodeid, prop),
1128+
str(self.get(nodeid, prop)))
1129+
11141130
# XXX not in spec
11151131
class Node:
11161132
''' A convenience wrapper for the given node
@@ -1169,6 +1185,9 @@ def Choice(name, db, *options):
11691185

11701186
#
11711187
# $Log: not supported by cvs2svn $
1188+
# Revision 1.70 2002/06/27 12:06:20 gmcm
1189+
# Improve an error message.
1190+
#
11721191
# Revision 1.69 2002/06/17 23:15:29 richard
11731192
# Can debug to stdout now
11741193
#

0 commit comments

Comments
 (0)