Skip to content

Commit 2ff9300

Browse files
committed
1 parent 770c226 commit 2ff9300

File tree

3 files changed

+57
-3
lines changed

3 files changed

+57
-3
lines changed

roundup/anypy/email_.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import email
44
from email import quoprimime, base64mime
55

6+
from roundup.anypy.strings import bs2b
7+
from email import charset as _charset
8+
69
if str == bytes:
710
message_from_bytes = email.message_from_string
811
message_from_binary_file = email.message_from_file
@@ -45,11 +48,12 @@ def decode_header(header):
4548
"""
4649
# If it is a Header object, we can just return the encoded chunks.
4750
if hasattr(header, '_chunks'):
51+
# no bs2b here. _charset._encode should return bytes
4852
return [(_charset._encode(string, str(charset)), str(charset))
4953
for string, charset in header._chunks]
5054
# If no encoding, just return the header with no charset.
5155
if not ecre.search(header):
52-
return [(header, None)]
56+
return [(bs2b(header), None)]
5357
# First step is to parse all the encoded parts into triplets of the form
5458
# (encoded_string, encoding, charset). For unencoded strings, the last
5559
# two parts will be None.
@@ -113,14 +117,14 @@ def decode_header(header):
113117
last_word = word
114118
last_charset = charset
115119
elif charset != last_charset:
116-
collapsed.append((last_word, last_charset))
120+
collapsed.append((bs2b(last_word), last_charset))
117121
last_word = word
118122
last_charset = charset
119123
elif last_charset is None:
120124
BSPACE = b' '
121125
last_word += BSPACE + word
122126
else:
123127
last_word += word
124-
collapsed.append((last_word, last_charset))
128+
collapsed.append((bs2b(last_word), last_charset))
125129
return collapsed
126130

roundup/mailgw.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,13 @@ def _decode_header(self, hdr):
204204
for part, encoding in decode_header(hdr):
205205
if encoding:
206206
part = part.decode(encoding)
207+
else:
208+
# if the encoding is unknown, try decoding with utf-8
209+
# and fallback on iso-8859-1 if that fails
210+
try:
211+
part = part.decode('utf-8')
212+
except UnicodeDecodeError:
213+
part = part.decode('iso-8859-1')
207214
# RFC 2047 specifies that between encoded parts spaces are
208215
# swallowed while at the borders from encoded to non-encoded
209216
# or vice-versa we must preserve a space. Multiple adjacent

test/test_mailgw_roundupmessage.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,34 @@ class HeaderRoundupMessageTests(TestCase):
7575
This is a test submission of a new issue.
7676
""")
7777

78+
# From line has a null/empty encoding spec
79+
# to trigger failure in mailgw.py:RoundupMessage::_decode_header
80+
bad_msg_utf8 = message_from_string("""
81+
Content-Type: text/plain;
82+
charset="iso-8859-1"
83+
From: =??b?SOKCrGxsbw=====?= <[email protected]>
84+
To: Issue Tracker <[email protected]>
85+
Cc: =?utf8?b?SOKCrGxsbw==?= <[email protected]>,
86+
Some User <[email protected]>
87+
Message-Id: <dummy_test_message_id>
88+
Subject: [issue] Testing...
89+
90+
This is a test submission of a new issue.
91+
""")
92+
93+
bad_msg_iso_8859_1 = message_from_string("""
94+
Content-Type: text/plain;
95+
charset="iso-8859-1"
96+
From: =??q?\x80SOKCrGxsbw=====?= <[email protected]>
97+
To: Issue Tracker <[email protected]>
98+
Cc: =?utf8?b?SOKCrGxsbw==?= <[email protected]>,
99+
Some User <[email protected]>
100+
Message-Id: <dummy_test_message_id>
101+
Subject: [issue] Testing...
102+
103+
This is a test submission of a new issue.
104+
""")
105+
78106
def test_get_plain_header(self):
79107
self.assertEqual(
80108
self.msg.get_header('to'),
@@ -85,6 +113,21 @@ def test_get_encoded_header(self):
85113
self.msg.get_header('from'),
86114
'H€llo <[email protected]>')
87115

116+
# issue2551008 null encoding causes crash.
117+
self.assertEqual(
118+
self.bad_msg_utf8.get_header('from'),
119+
'H€llo <[email protected]>')
120+
121+
# the decoded value is not what the user wanted,
122+
# but they should have created a valid header
123+
# if they wanted the right outcome...
124+
self.assertIn(
125+
self.bad_msg_iso_8859_1.get_header('from'),
126+
(
127+
'\xc2\x80SOKCrGxsbw===== <[email protected]>', # python 2
128+
'\x80SOKCrGxsbw===== <[email protected]>' # python 3
129+
))
130+
88131
def test_get_address_list(self):
89132
self.assertEqual(self.msg.get_address_list('cc'), [
90133
('H€llo', '[email protected]'),

0 commit comments

Comments
 (0)