Skip to content

Commit 7b426cd

Browse files
committed
Avoid errors indexing binary uploads with Python 3.
If you upload a binary file for a FileClass whose content property is set to be indexed (the default), an error of the form "'utf-8' codec can't decode byte 0x89 in position 0: invalid start byte" can occur when the code attempts to index the content of that file. (This error is after the creation of the file, and any issue etc. created at the same time, has been committed; the page returned gives the impression that the creation failed, but that's not the case.) The indexing itself only happens for text/plain files, but that check is in the indexers themselves, after this error occurs (and it's entirely possible that a text/plain upload could actually have some binary or non-UTF-8 content). bytes objects for the binary contents get converted to str, with resulting errors when they are not in fact UTF-8 text. This patch makes the places that might try indexing binary content do the conversion to strings, for Python 3, with errors='ignore', so that at least no such exception occurs (and if the file is not text/plain, the results of the conversion will then get discarded in the indexers).
1 parent 1fadabe commit 7b426cd

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

roundup/backends/back_anydbm.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2227,9 +2227,12 @@ def set(self, itemid, **propvalues):
22272227
# store and possibly index
22282228
self.db.storefile(self.classname, itemid, None, bs2b(content))
22292229
if self.properties['content'].indexme:
2230+
index_content = content
2231+
if bytes != str and isinstance(content, bytes):
2232+
index_content = content.decode('utf-8', errors='ignore')
22302233
mime_type = self.get(itemid, 'type', self.default_mime_type)
22312234
self.db.indexer.add_text((self.classname, itemid, 'content'),
2232-
content, mime_type)
2235+
index_content, mime_type)
22332236
propvalues['content'] = content
22342237

22352238
# fire reactors
@@ -2245,8 +2248,12 @@ def index(self, nodeid):
22452248
for prop, propclass in self.getprops().items():
22462249
if prop == 'content' and propclass.indexme:
22472250
mime_type = self.get(nodeid, 'type', self.default_mime_type)
2251+
index_content = self.get(nodeid, 'binary_content')
2252+
if bytes != str and isinstance(index_content, bytes):
2253+
index_content = index_content.decode('utf-8',
2254+
errors='ignore')
22482255
self.db.indexer.add_text((self.classname, nodeid, 'content'),
2249-
str(self.get(nodeid, 'content')), mime_type)
2256+
index_content, mime_type)
22502257
elif isinstance(propclass, hyperdb.String) and propclass.indexme:
22512258
# index them under (classname, nodeid, property)
22522259
try:

roundup/backends/rdbms_common.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3052,8 +3052,11 @@ def create(self, **propvalues):
30523052

30533053
# and index!
30543054
if self.properties['content'].indexme:
3055+
index_content = content
3056+
if bytes != str and isinstance(content, bytes):
3057+
index_content = content.decode('utf-8', errors='ignore')
30553058
self.db.indexer.add_text((self.classname, newid, 'content'),
3056-
content, mime_type)
3059+
index_content, mime_type)
30573060

30583061
# store off the content as a file
30593062
self.db.storefile(self.classname, newid, None, bs2b(content))
@@ -3105,8 +3108,11 @@ def set(self, itemid, **propvalues):
31053108
self.db.storefile(self.classname, itemid, None, bs2b(content))
31063109
if self.properties['content'].indexme:
31073110
mime_type = self.get(itemid, 'type', self.default_mime_type)
3111+
index_content = content
3112+
if bytes != str and isinstance(content, bytes):
3113+
index_content = content.decode('utf-8', errors='ignore')
31083114
self.db.indexer.add_text((self.classname, itemid, 'content'),
3109-
content, mime_type)
3115+
index_content, mime_type)
31103116
propvalues['content'] = content
31113117

31123118
# fire reactors
@@ -3122,8 +3128,12 @@ def index(self, nodeid):
31223128
for prop, propclass in self.getprops().items():
31233129
if prop == 'content' and propclass.indexme:
31243130
mime_type = self.get(nodeid, 'type', self.default_mime_type)
3131+
index_content = self.get(nodeid, 'binary_content')
3132+
if bytes != str and isinstance(index_content, bytes):
3133+
index_content = index_content.decode('utf-8',
3134+
errors='ignore')
31253135
self.db.indexer.add_text((self.classname, nodeid, 'content'),
3126-
str(self.get(nodeid, 'content')), mime_type)
3136+
index_content, mime_type)
31273137
elif isinstance(propclass, hyperdb.String) and propclass.indexme:
31283138
# index them under (classname, nodeid, property)
31293139
try:

0 commit comments

Comments
 (0)