Skip to content

Commit bfb0b86

Browse files
author
Richard Jones
committed
force sqlite3 in py2.6+ to treat our stored text as UTF-8
1 parent 24ac24e commit bfb0b86

File tree

3 files changed

+9
-4
lines changed

3 files changed

+9
-4
lines changed

roundup/anypy/TODO.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,4 @@ Python compatiblity TODO
2222
MimeWriter.MimeWrite.addheader
2323
-> email.Message.Message.add_header (2.3)
2424

25-
- test.test_sqlite.sqliteDBTest.testStringUnicode fails
26-
2725
# vim: si

roundup/backends/back_sqlite.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ def sql_open_connection(self):
108108
else:
109109
conn = sqlite.connect(db, timeout=30)
110110
conn.row_factory = sqlite.Row
111+
112+
# sqlite3 wants us to store Unicode in the db but that's not what's
113+
# been done historically and it's definitely not what the other
114+
# backends do, so we'll stick with UTF-8
115+
if sqlite_version == 3:
116+
conn.text_factory = str
117+
111118
cursor = conn.cursor()
112119
return (conn, cursor)
113120

roundup/backends/indexer_rdbms.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,8 @@ def add_text(self, identifier, text, mime_type='text/plain'):
6565

6666
# ok, find all the unique words in the text
6767
text = unicode(text, "utf-8", "replace").upper()
68-
wordlist = [w.encode("utf-8", "replace")
69-
for w in re.findall(r'(?u)\b\w{2,25}\b', text)]
68+
wordlist = [w.encode("utf-8")
69+
for w in re.findall(r'(?u)\b\w{2,25}\b', text)]
7070
words = set()
7171
for word in wordlist:
7272
if self.is_stopword(word): continue

0 commit comments

Comments
 (0)