Skip to content

Commit e585acd

Browse files
author
Richard Jones
committed
fix reindexing in Xapian
1 parent 79318d4 commit e585acd

File tree

5 files changed

+54
-11
lines changed

5 files changed

+54
-11
lines changed

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Fixed:
99
- fixes in scripts/import_sf.py
1010
- fix some unicode bugs in roundup-admin import
1111
- Xapian indexer wasn't actually being used
12+
- fix reindexing in Xapian
1213
- fix indexing of message content on roundup-admin import
1314

1415

roundup/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
# $Id: __init__.py,v 1.34 2006-02-03 04:12:36 richard Exp $
18+
# $Id: __init__.py,v 1.35 2006-02-07 04:59:05 richard Exp $
1919

2020
'''Roundup - issue tracking for knowledge workers.
2121
@@ -68,6 +68,6 @@
6868
'''
6969
__docformat__ = 'restructuredtext'
7070

71-
__version__ = '1.0.1'
71+
__version__ = '1.0.2p1'
7272

7373
# vim: set filetype=python ts=4 sw=4 et si

roundup/backends/indexer_xapian.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#$Id: indexer_xapian.py,v 1.2 2006-02-06 21:00:47 richard Exp $
1+
#$Id: indexer_xapian.py,v 1.3 2006-02-07 04:59:05 richard Exp $
22
''' This implements the full-text indexer using the Xapian indexer.
33
'''
44
import re, os
@@ -47,6 +47,7 @@ def add_text(self, identifier, text, mime_type='text/plain'):
4747
''' "identifier" is (classname, itemid, property) '''
4848
if mime_type != 'text/plain':
4949
return
50+
if not text: text = ''
5051

5152
# open the database and start a transaction if needed
5253
database = self._get_database()
@@ -57,18 +58,38 @@ def add_text(self, identifier, text, mime_type='text/plain'):
5758

5859
# TODO: allow configuration of other languages
5960
stemmer = xapian.Stem("english")
61+
62+
# We use the identifier twice: once in the actual "text" being
63+
# indexed so we can search on it, and again as the "data" being
64+
# indexed so we know what we're matching when we get results
65+
identifier = '%s:%s:%s'%identifier
66+
67+
# see if the id is in the database
68+
enquire = xapian.Enquire(database)
69+
query = xapian.Query(xapian.Query.OP_AND, [identifier])
70+
enquire.set_query(query)
71+
matches = enquire.get_mset(0, 10)
72+
if matches.size(): # would it killya to implement __len__()??
73+
b = matches.begin()
74+
docid = b.get_docid()
75+
else:
76+
docid = None
77+
78+
# create the new document
6079
doc = xapian.Document()
80+
doc.set_data(identifier)
81+
doc.add_posting(identifier, 0)
6182

62-
# Xapian doesn't actually seem to care what data is put in here, so
63-
# we use it to store the text identifier.
64-
doc.set_data('%s:%s:%s'%identifier)
6583
for match in re.finditer(r'\b\w{2,25}\b', text.upper()):
6684
word = match.group(0)
6785
if self.is_stopword(word):
6886
continue
6987
term = stemmer.stem_word(word)
7088
doc.add_posting(term, match.start(0))
71-
database.add_document(doc)
89+
if docid:
90+
database.replace_document(docid, doc)
91+
else:
92+
database.add_document(doc)
7293

7394
def find(self, wordlist):
7495
'''look up all the words in the wordlist.

test/db_test_base.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
# $Id: db_test_base.py,v 1.65 2006-02-07 04:14:32 richard Exp $
18+
# $Id: db_test_base.py,v 1.66 2006-02-07 04:59:05 richard Exp $
1919

2020
import unittest, os, shutil, errno, imp, sys, time, pprint, sets
2121

@@ -735,7 +735,7 @@ def testIndexerSearching(self):
735735
# unindexed stopword
736736
self.assertEquals(self.db.indexer.search(['the'], self.db.issue), {})
737737

738-
def testReindexing(self):
738+
def testReindexingChange(self):
739739
search = self.db.indexer.search
740740
issue = self.db.issue
741741
i1 = issue.create(title="flebble plop")
@@ -750,6 +750,15 @@ def testReindexing(self):
750750
self.assertEquals(search(['plop'], issue), {i1: {}})
751751
self.assertEquals(search(['flebble'], issue), {i2: {}})
752752

753+
def testReindexingClear(self):
754+
search = self.db.indexer.search
755+
issue = self.db.issue
756+
i1 = issue.create(title="flebble plop")
757+
i2 = issue.create(title="flebble frooz")
758+
self.db.commit()
759+
self.assertEquals(search(['plop'], issue), {i1: {}})
760+
self.assertEquals(search(['flebble'], issue), {i1: {}, i2: {}})
761+
753762
# unset i1's title
754763
issue.set(i1, title="")
755764
self.db.commit()

test/test_indexer.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1919
# SOFTWARE.
2020

21-
# $Id: test_indexer.py,v 1.9 2006-02-07 04:14:32 richard Exp $
21+
# $Id: test_indexer.py,v 1.10 2006-02-07 04:59:05 richard Exp $
2222

2323
import os, unittest, shutil
2424

@@ -46,10 +46,22 @@ def test_basics(self):
4646
self.assertEqual(self.dex.find(['blah']), [('test', '2', 'foo')])
4747
self.assertEqual(self.dex.find(['blah', 'hello']), [])
4848

49-
# change
49+
def test_change(self):
50+
self.dex.add_text(('test', '1', 'foo'), 'a the hello world')
51+
self.dex.add_text(('test', '2', 'foo'), 'blah blah the world')
52+
self.assertEqual(self.dex.find(['world']), [('test', '1', 'foo'),
53+
('test', '2', 'foo')])
5054
self.dex.add_text(('test', '1', 'foo'), 'a the hello')
5155
self.assertEqual(self.dex.find(['world']), [('test', '2', 'foo')])
5256

57+
def test_clear(self):
58+
self.dex.add_text(('test', '1', 'foo'), 'a the hello world')
59+
self.dex.add_text(('test', '2', 'foo'), 'blah blah the world')
60+
self.assertEqual(self.dex.find(['world']), [('test', '1', 'foo'),
61+
('test', '2', 'foo')])
62+
self.dex.add_text(('test', '1', 'foo'), '')
63+
self.assertEqual(self.dex.find(['world']), [('test', '2', 'foo')])
64+
5365
def tearDown(self):
5466
shutil.rmtree('test-index')
5567

0 commit comments

Comments
 (0)