Skip to content

Commit 5d96fb9

Browse files
Bernhard ReiterBernhard Reiter
authored andcommitted
Added more indexer tests for stopwords, case-insensitity...
...and short and long words.
1 parent 66a064b commit 5d96fb9

File tree

1 file changed

+42
-0
lines changed

1 file changed

+42
-0
lines changed

test/test_indexer.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,48 @@ def test_clear(self):
8282
self.dex.add_text(('test', '1', 'foo'), '')
8383
self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')])
8484

85+
def test_stopwords(self):
86+
"""Test that we can find a text with a stopword in it."""
87+
stopword = "with"
88+
self.assert_(self.dex.is_stopword(stopword.upper()))
89+
self.dex.add_text(('test', '1', 'bar'), '%s hello world' % stopword)
90+
self.dex.add_text(('test', '2', 'bar'), 'blah a %s world' % stopword)
91+
self.dex.add_text(('test', '3', 'bar'), 'blah Blub river')
92+
self.dex.add_text(('test', '4', 'bar'), 'blah river %s' % stopword)
93+
self.assertSeqEqual(self.dex.find(['with','world']),
94+
[('test', '1', 'bar'),
95+
('test', '2', 'bar')])
96+
def test_extremewords(self):
97+
"""Testing too short or too long words."""
98+
short = "b"
99+
long = "abcdefghijklmnopqrstuvwxyz"
100+
self.dex.add_text(('test', '1', 'a'), '%s hello world' % short)
101+
self.dex.add_text(('test', '2', 'a'), 'blah a %s world' % short)
102+
self.dex.add_text(('test', '3', 'a'), 'blah Blub river')
103+
self.dex.add_text(('test', '4', 'a'), 'blah river %s %s'
104+
% (short, long))
105+
self.assertSeqEqual(self.dex.find([short,'world', long, short]),
106+
[('test', '1', 'a'),
107+
('test', '2', 'a')])
108+
self.assertSeqEqual(self.dex.find([long]),[])
109+
110+
# special test because some faulty code indexed length(word)>=2
111+
# but only considered length(word)>=3 to be significant
112+
self.dex.add_text(('test', '5', 'a'), 'blah py %s %s'
113+
% (short, long))
114+
self.assertSeqEqual(self.dex.find(["py"]), [('test', '5', 'a')])
115+
116+
def test_casesensitity(self):
117+
"""Test if searches are case-in-sensitive."""
118+
self.dex.add_text(('test', '1', 'a'), 'aaaa bbbb')
119+
self.dex.add_text(('test', '2', 'a'), 'aAaa BBBB')
120+
self.assertSeqEqual(self.dex.find(['aaaa']),
121+
[('test', '1', 'a'),
122+
('test', '2', 'a')])
123+
self.assertSeqEqual(self.dex.find(['BBBB']),
124+
[('test', '1', 'a'),
125+
('test', '2', 'a')])
126+
85127
def tearDown(self):
86128
shutil.rmtree('test-index')
87129

0 commit comments

Comments
 (0)