Skip to content

Commit 60ba1b4

Browse files
committed
Do not transcode binary email attachments (issue2551004).
Only transcode attachments if a charset is specified or if they have a text/* content-type.
1 parent bcb6270 commit 60ba1b4

File tree

4 files changed

+25
-25
lines changed

4 files changed

+25
-25
lines changed

roundup/mailgw.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,8 @@ def get_body(self):
241241

242242
if content is not None:
243243
charset = self.get_content_charset()
244-
content = u2s(content.decode(charset or 'iso8859-1', 'replace'))
244+
if charset or self.get_content_maintype() == 'text':
245+
content = u2s(content.decode(charset or 'iso8859-1', 'replace'))
245246

246247
return content
247248

test/test_mailgw.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,6 @@ def testNewIssueNoAuthorEmail(self):
832832
--001485f339f8f361fb049188dbba--
833833
'''
834834

835-
@pytest.mark.xfail
836835
def testOctetStreamTranscoding(self):
837836
self.doNewIssue()
838837
self._handle_mail(self.octetstream_msg)
@@ -841,13 +840,13 @@ def testOctetStreamTranscoding(self):
841840
msg = self.db.msg.getnode (messages[-1])
842841
assert(len(msg.files) == 1)
843842
names = {0 : 'testfile'}
844-
content = ['''This is a file containing text
843+
content = [b'''This is a file containing text
845844
in latin-1 format \xE4\xF6\xFC\xC4\xD6\xDC\xDF
846845
''']
847846
for n, id in enumerate (msg.files):
848847
f = self.db.file.getnode (id)
849848
self.assertEqual(f.name, names.get (n, 'unnamed'))
850-
self.assertEqual(f.content, content [n])
849+
self.assertEqual(f.binary_content, content [n])
851850

852851
def testMultipartKeepAlternatives(self):
853852
self.doNewIssue()

test/test_mailgw_roundupmessage.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def test_get_body_base64(self):
126126
dGVzdCBlbmNvZGVkIG1lc3NhZ2U=
127127
""")
128128

129-
self.assertEqual(msg.get_body(), 'test encoded message')
129+
self.assertEqual(msg.get_body(), b'test encoded message')
130130

131131

132132
class AsAttachmentRoundupMessageTests(TestCase):
@@ -153,7 +153,7 @@ def test_octet_stream(self):
153153
self.assertEqual(
154154
msg.as_attachment(),
155155
('message.dat', 'application/octet-stream',
156-
'test encoded message'))
156+
b'test encoded message'))
157157

158158
def test_rfc822(self):
159159
msg = message_from_string("""
@@ -221,7 +221,7 @@ def test_multipart_mixed(self):
221221

222222
self.assertEqual(msg.extract_content(), (
223223
'foo\n',
224-
[('foo.pdf', 'application/pdf', 'foo\n')],
224+
[('foo.pdf', 'application/pdf', b'foo\n')],
225225
False
226226
))
227227

@@ -257,11 +257,11 @@ def test_deep_multipart_alternative(self):
257257

258258
self.assertEqual(msg.extract_content(), (
259259
'foo2\n', [
260-
('foo.pdf', 'application/pdf', 'foo\n'),
260+
('foo.pdf', 'application/pdf', b'foo\n'),
261261
('foo.txt', 'text/plain', 'foo\n'),
262262
('foo.html', 'text/html', '<html>foo</html>\n'),
263263
('foo3.txt', 'text/plain', 'foo3\n'),
264-
('foo2.pdf', 'application/pdf', 'foo2\n'),
264+
('foo2.pdf', 'application/pdf', b'foo2\n'),
265265
],
266266
False
267267
))
@@ -286,7 +286,7 @@ def test_deep_multipart_alternative_ignore(self):
286286
self.assertEqual(msg.extract_content(ignore_alternatives=True), (
287287
'foo2\n', [
288288
('foo3.txt', 'text/plain', 'foo3\n'),
289-
('foo2.pdf', 'application/pdf', 'foo2\n'),
289+
('foo2.pdf', 'application/pdf', b'foo2\n'),
290290
],
291291
False
292292
))
@@ -314,7 +314,7 @@ def test_signed_attachemts(self):
314314

315315
self.assertEqual(msg.extract_content(), (
316316
'foo\n',
317-
[('foo.pdf', 'application/pdf', 'foo\n')],
317+
[('foo.pdf', 'application/pdf', b'foo\n')],
318318
False
319319
))
320320

@@ -328,7 +328,7 @@ def test_attached_signature(self):
328328

329329
self.assertEqual(msg.extract_content(), (
330330
'foo\n',
331-
[('foo.gpg', 'application/pgp-signature', 'foo\n')],
331+
[('foo.gpg', 'application/pgp-signature', b'foo\n')],
332332
False
333333
))
334334

test/test_multipart.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def testMultipartMixed(self):
186186
text/plain
187187
application/pdf""",
188188
('foo\n',
189-
[('foo.pdf', 'application/pdf', 'foo\n')], False))
189+
[('foo.pdf', 'application/pdf', b'foo\n')], False))
190190

191191
def testMultipartMixedHtml(self):
192192
# test with html conversion enabled
@@ -197,7 +197,7 @@ def testMultipartMixedHtml(self):
197197
('bar >\n',
198198
[('bar.html', 'text/html',
199199
'<html><body>bar &gt;</body></html>\n'),
200-
('foo.pdf', 'application/pdf', 'foo\n')], False),
200+
('foo.pdf', 'application/pdf', b'foo\n')], False),
201201
convert_html_with='dehtml')
202202

203203
# test with html conversion disabled
@@ -208,15 +208,15 @@ def testMultipartMixedHtml(self):
208208
(None,
209209
[('bar.html', 'text/html',
210210
'<html><body>bar &gt;</body></html>\n'),
211-
('foo.pdf', 'application/pdf', 'foo\n')], False),
211+
('foo.pdf', 'application/pdf', b'foo\n')], False),
212212
convert_html_with=False)
213213

214214
def testMultipartAlternative(self):
215215
self.TestExtraction("""
216216
multipart/alternative
217217
text/plain
218218
application/pdf
219-
""", ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')], False))
219+
""", ('foo\n', [('foo.pdf', 'application/pdf', b'foo\n')], False))
220220

221221
def testMultipartAlternativeHtml(self):
222222
self.TestExtraction("""
@@ -226,7 +226,7 @@ def testMultipartAlternativeHtml(self):
226226
('bar >\n',
227227
[('bar.html', 'text/html',
228228
'<html><body>bar &gt;</body></html>\n'),
229-
('foo.pdf', 'application/pdf', 'foo\n')], False),
229+
('foo.pdf', 'application/pdf', b'foo\n')], False),
230230
convert_html_with='dehtml')
231231

232232
self.TestExtraction("""
@@ -236,7 +236,7 @@ def testMultipartAlternativeHtml(self):
236236
(None,
237237
[('bar.html', 'text/html',
238238
'<html><body>bar &gt;</body></html>\n'),
239-
('foo.pdf', 'application/pdf', 'foo\n')], False),
239+
('foo.pdf', 'application/pdf', b'foo\n')], False),
240240
convert_html_with=False)
241241

242242
def testMultipartAlternativeHtmlText(self):
@@ -249,7 +249,7 @@ def testMultipartAlternativeHtmlText(self):
249249
('foo\n',
250250
[('bar.html', 'text/html',
251251
'<html><body>bar &gt;</body></html>\n'),
252-
('foo.pdf', 'application/pdf', 'foo\n')], False),
252+
('foo.pdf', 'application/pdf', b'foo\n')], False),
253253
convert_html_with='dehtml')
254254

255255
# text should take priority over html when text is first
@@ -261,7 +261,7 @@ def testMultipartAlternativeHtmlText(self):
261261
('foo\n',
262262
[('bar.html', 'text/html',
263263
'<html><body>bar &gt;</body></html>\n'),
264-
('foo.pdf', 'application/pdf', 'foo\n')], False),
264+
('foo.pdf', 'application/pdf', b'foo\n')], False),
265265
convert_html_with='dehtml')
266266

267267
# text should take priority over html when text is second and
@@ -274,7 +274,7 @@ def testMultipartAlternativeHtmlText(self):
274274
('foo\n',
275275
[('bar.html', 'text/html',
276276
'<html><body>bar &gt;</body></html>\n'),
277-
('foo.pdf', 'application/pdf', 'foo\n')], False),
277+
('foo.pdf', 'application/pdf', b'foo\n')], False),
278278
convert_html_with=False)
279279

280280
# text should take priority over html when text is first and
@@ -287,7 +287,7 @@ def testMultipartAlternativeHtmlText(self):
287287
('foo\n',
288288
[('bar.html', 'text/html',
289289
'<html><body>bar &gt;</body></html>\n'),
290-
('foo.pdf', 'application/pdf', 'foo\n')], False),
290+
('foo.pdf', 'application/pdf', b'foo\n')], False),
291291
convert_html_with=False)
292292

293293
def testDeepMultipartAlternative(self):
@@ -296,7 +296,7 @@ def testDeepMultipartAlternative(self):
296296
multipart/alternative
297297
text/plain
298298
application/pdf
299-
""", ('foo\n', [('foo.pdf', 'application/pdf', 'foo\n')], False))
299+
""", ('foo\n', [('foo.pdf', 'application/pdf', b'foo\n')], False))
300300

301301
def testSignedText(self):
302302
self.TestExtraction("""
@@ -312,15 +312,15 @@ def testSignedAttachments(self):
312312
application/pdf
313313
application/pgp-signature""",
314314
('foo\n',
315-
[('foo.pdf', 'application/pdf', 'foo\n')], False))
315+
[('foo.pdf', 'application/pdf', b'foo\n')], False))
316316

317317
def testAttachedSignature(self):
318318
self.TestExtraction("""
319319
multipart/mixed
320320
text/plain
321321
application/pgp-signature""",
322322
('foo\n',
323-
[('foo.gpg', 'application/pgp-signature', 'foo\n')], False))
323+
[('foo.gpg', 'application/pgp-signature', b'foo\n')], False))
324324

325325
def testMessageRfc822(self):
326326
self.TestExtraction("""

0 commit comments

Comments
 (0)