Skip to content

Commit 796b39e

Browse files
committed
Fix Issue2550609.
1 parent 3f6fabb commit 796b39e

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

roundup/backends/indexer_rdbms.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,14 @@ def add_text(self, identifier, text, mime_type='text/plain'):
6464
self.db.cursor.execute(sql, (id, ))
6565

6666
# ok, find all the unique words in the text
67-
text = unicode(text, "utf-8", "replace").upper()
68-
wordlist = [w.encode("utf-8")
69-
for w in re.findall(r'(?u)\b\w{%d,%d}\b'
70-
% (self.minlength, self.maxlength), text)]
67+
def tryencode(str):
68+
if not isinstance(str, unicode):
69+
str = str.encode("utf-8", "replace")
70+
return str
71+
text = tryencode(text).upper()
72+
wordlist = [tryencode(w)
73+
for w in re.findall(r'(?u)\b\w{%d,%d}\b'
74+
% (self.minlength, self.maxlength), text)]
7175
words = set()
7276
for word in wordlist:
7377
if self.is_stopword(word): continue

0 commit comments

Comments
 (0)