Skip to content

Commit 3afb6a0

Browse files
committed
Handle the issue in _decode_header by decoding only when decode_header returns bytes.
1 parent 32ef6cd commit 3afb6a0

File tree

2 files changed

+16
-15
lines changed

2 files changed

+16
-15
lines changed

roundup/anypy/email_.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
import binascii
33
import email
44
from email import quoprimime, base64mime
5-
6-
from roundup.anypy.strings import bs2b
75
from email import charset as _charset
86

97
if str == bytes:
@@ -48,12 +46,11 @@ def decode_header(header):
4846
"""
4947
# If it is a Header object, we can just return the encoded chunks.
5048
if hasattr(header, '_chunks'):
51-
# no bs2b here. _charset._encode should return bytes
5249
return [(_charset._encode(string, str(charset)), str(charset))
5350
for string, charset in header._chunks]
5451
# If no encoding, just return the header with no charset.
5552
if not ecre.search(header):
56-
return [(bs2b(header), None)]
53+
return [(header, None)]
5754
# First step is to parse all the encoded parts into triplets of the form
5855
# (encoded_string, encoding, charset). For unencoded strings, the last
5956
# two parts will be None.
@@ -117,14 +114,14 @@ def decode_header(header):
117114
last_word = word
118115
last_charset = charset
119116
elif charset != last_charset:
120-
collapsed.append((bs2b(last_word), last_charset))
117+
collapsed.append((last_word, last_charset))
121118
last_word = word
122119
last_charset = charset
123120
elif last_charset is None:
124121
BSPACE = b' '
125122
last_word += BSPACE + word
126123
else:
127124
last_word += word
128-
collapsed.append((bs2b(last_word), last_charset))
125+
collapsed.append((last_word, last_charset))
129126
return collapsed
130127

roundup/mailgw.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -202,15 +202,19 @@ class RoundupMessage(email.message.Message):
202202
def _decode_header(self, hdr):
203203
parts = []
204204
for part, encoding in decode_header(hdr):
205-
if encoding:
206-
part = part.decode(encoding)
207-
else:
208-
# if the encoding is unknown, try decoding with utf-8
209-
# and fallback on iso-8859-1 if that fails
210-
try:
211-
part = part.decode('utf-8')
212-
except UnicodeDecodeError:
213-
part = part.decode('iso-8859-1')
205+
# decode_header might return either bytes or unicode,
206+
# see https://bugs.python.org/issue21492
207+
# If part is bytes, try to decode it with the specified
208+
# encoding if it's provided, otherwise try utf-8 and
209+
# fallback on iso-8859-1 if that fails.
210+
if isinstance(part, bytes):
211+
if encoding:
212+
part = part.decode(encoding)
213+
else:
214+
try:
215+
part = part.decode('utf-8')
216+
except UnicodeDecodeError:
217+
part = part.decode('iso-8859-1')
214218
# RFC 2047 specifies that between encoded parts spaces are
215219
# swallowed while at the borders from encoded to non-encoded
216220
# or vice-versa we must preserve a space. Multiple adjacent

0 commit comments

Comments
 (0)