Handle the issue in _decode_header by decoding only when decode_header returns bytes.

ezio-melotti · ezio-melotti · commit 3afb6a0a986c · 2019-06-05T00:13:45.000+02:00
diff --git a/roundup/anypy/email_.py b/roundup/anypy/email_.py
@@ -2,8 +2,6 @@
 import binascii
 import email
 from email import quoprimime, base64mime
-
-from roundup.anypy.strings import bs2b
 from email import charset as _charset
 
 if str == bytes:
@@ -48,12 +46,11 @@ def decode_header(header):
     """
     # If it is a Header object, we can just return the encoded chunks.
     if hasattr(header, '_chunks'):
-        # no bs2b here. _charset._encode should return bytes
         return [(_charset._encode(string, str(charset)), str(charset))
                     for string, charset in header._chunks]
     # If no encoding, just return the header with no charset.
     if not ecre.search(header):
-        return [(bs2b(header), None)]
+        return [(header, None)]
     # First step is to parse all the encoded parts into triplets of the form
     # (encoded_string, encoding, charset).  For unencoded strings, the last
     # two parts will be None.
@@ -117,14 +114,14 @@ def decode_header(header):
             last_word = word
             last_charset = charset
         elif charset != last_charset:
-            collapsed.append((bs2b(last_word), last_charset))
+            collapsed.append((last_word, last_charset))
             last_word = word
             last_charset = charset
         elif last_charset is None:
             BSPACE = b' '
             last_word += BSPACE + word
         else:
             last_word += word
-    collapsed.append((bs2b(last_word), last_charset))
+    collapsed.append((last_word, last_charset))
     return collapsed
 
diff --git a/roundup/mailgw.py b/roundup/mailgw.py
@@ -202,15 +202,19 @@ class RoundupMessage(email.message.Message):
     def _decode_header(self, hdr):
         parts = []
         for part, encoding in decode_header(hdr):
-            if encoding:
-                part = part.decode(encoding)
-            else:
-                # if the encoding is unknown, try decoding with utf-8
-                # and fallback on iso-8859-1 if that fails
-                try:
-                    part = part.decode('utf-8')
-                except UnicodeDecodeError:
-                    part = part.decode('iso-8859-1')
+            # decode_header might return either bytes or unicode,
+            # see https://bugs.python.org/issue21492
+            # If part is bytes, try to decode it with the specified
+            # encoding if it's provided, otherwise try utf-8 and
+            # fallback on iso-8859-1 if that fails.
+            if isinstance(part, bytes):
+                if encoding:
+                    part = part.decode(encoding)
+                else:
+                    try:
+                        part = part.decode('utf-8')
+                    except UnicodeDecodeError:
+                        part = part.decode('iso-8859-1')
             # RFC 2047 specifies that between encoded parts spaces are
             # swallowed while at the borders from encoded to non-encoded
             # or vice-versa we must preserve a space. Multiple adjacent