Skip to content

Commit 397cc6f

Browse files
author
Andrey Lebedev
committed
applied unicode patch
1 parent 0cd65cf commit 397cc6f

File tree

11 files changed

+218
-29
lines changed

11 files changed

+218
-29
lines changed

CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ are given with the most recent entry first.
1515
- fix StringHTMLProperty hyperlinking
1616
- added mysql backend
1717
- fixes to CGI form handling (NEEDS BACKPORTING TO 0.5)
18+
- applied unicode patch. All data is stored in utf-8. Incoming messages
19+
converted from any encoding to utf-8, outgoing messages are encoded
20+
according to rfc2822 (sf bug 568873)
1821

1922

2023
2003-??-?? 0.5.5

roundup/backends/back_anydbm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
#$Id: back_anydbm.py,v 1.96 2003-01-08 05:39:40 richard Exp $
18+
#$Id: back_anydbm.py,v 1.97 2003-01-15 22:17:19 kedder Exp $
1919
'''
2020
This module defines a backend that saves the hyperdatabase in a database
2121
chosen by anydbm. It is guaranteed to always be available in python
@@ -847,7 +847,7 @@ def create(self, **propvalues):
847847
(self.classname, newid, key))
848848

849849
elif isinstance(prop, String):
850-
if type(value) != type(''):
850+
if type(value) != type('') and type(value) != type(u''):
851851
raise TypeError, 'new property "%s" not a string'%key
852852

853853
elif isinstance(prop, Password):
@@ -1244,7 +1244,7 @@ class or a KeyError is raised.
12441244
journalvalues[propname] = tuple(l)
12451245

12461246
elif isinstance(prop, String):
1247-
if value is not None and type(value) != type(''):
1247+
if value is not None and type(value) != type('') and type(value) != type(u''):
12481248
raise TypeError, 'new property "%s" not a string'%propname
12491249

12501250
elif isinstance(prop, Password):

roundup/backends/rdbms_common.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# $Id: rdbms_common.py,v 1.28 2003-01-12 23:53:20 richard Exp $
1+
# $Id: rdbms_common.py,v 1.29 2003-01-15 22:17:19 kedder Exp $
22
''' Relational database (SQL) backend common code.
33
44
Basics:
@@ -1070,7 +1070,7 @@ def create(self, **propvalues):
10701070
(self.classname, newid, key))
10711071

10721072
elif isinstance(prop, String):
1073-
if type(value) != type(''):
1073+
if type(value) != type('') and type(value) != type(u''):
10741074
raise TypeError, 'new property "%s" not a string'%key
10751075

10761076
elif isinstance(prop, Password):
@@ -1432,7 +1432,7 @@ class or a KeyError is raised.
14321432
journalvalues[propname] = tuple(l)
14331433

14341434
elif isinstance(prop, String):
1435-
if value is not None and type(value) != type(''):
1435+
if value is not None and type(value) != type('') and type(value) != type(u''):
14361436
raise TypeError, 'new property "%s" not a string'%propname
14371437

14381438
elif isinstance(prop, Password):

roundup/mailgw.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,14 +73,16 @@ class node. Any parts of other types are each stored in separate files
7373
an exception, the original message is bounced back to the sender with the
7474
explanatory message given in the exception.
7575
76-
$Id: mailgw.py,v 1.106 2003-01-12 00:03:10 richard Exp $
76+
$Id: mailgw.py,v 1.107 2003-01-15 22:17:19 kedder Exp $
7777
'''
7878

7979
import string, re, os, mimetools, cStringIO, smtplib, socket, binascii, quopri
8080
import time, random, sys
8181
import traceback, MimeWriter
8282
import hyperdb, date, password
8383

84+
import rfc2822
85+
8486
SENDMAILDEBUG = os.environ.get('SENDMAILDEBUG', '')
8587

8688
class MailGWError(ValueError):
@@ -134,6 +136,10 @@ def getPart(self):
134136
s.seek(0)
135137
return Message(s)
136138

139+
def getheader(self, name, default=None):
140+
hdr = mimetools.Message.getheader(self, name, default)
141+
return rfc2822.decode_header(hdr)
142+
137143
subject_re = re.compile(r'(?P<refwd>\s*\W?\s*(fw|fwd|re|aw)\W\s*)*'
138144
r'\s*(?P<quote>")?(\[(?P<classname>[^\d\s]+)(?P<nodeid>\d+)?\])?'
139145
r'\s*(?P<title>[^[]+)?"?(\[(?P<args>.+?)\])?', re.I)
@@ -339,7 +345,7 @@ def bounce_message(self, message, sendto, error,
339345
writer.addheader('MIME-Version', '1.0')
340346
part = writer.startmultipartbody('mixed')
341347
part = writer.nextpart()
342-
body = part.startbody('text/plain')
348+
body = part.startbody('text/plain; charset=utf-8')
343349
body.write('\n'.join(error))
344350

345351
# attach the original message to the returned message
@@ -377,7 +383,19 @@ def get_part_data_decoded(self,part):
377383
else:
378384
# take it as text
379385
data = part.fp.read()
380-
return data
386+
387+
# Encode message to unicode
388+
charset = rfc2822.unaliasCharset(part.getparam("charset"))
389+
if charset:
390+
# Do conversion only if charset specified
391+
edata = unicode(data, charset).encode('utf-8')
392+
# Convert from dos eol to unix
393+
edata = edata.replace('\r\n', '\n')
394+
else:
395+
# Leave message content as is
396+
edata = data
397+
398+
return edata
381399

382400
def handle_message(self, message):
383401
''' message - a Message instance

roundup/rfc2822.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
import re
2+
from binascii import b2a_base64, a2b_base64
3+
4+
ecre = re.compile(r'''
5+
=\? # literal =?
6+
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
7+
\? # literal ?
8+
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
9+
\? # literal ?
10+
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
11+
\?= # literal ?=
12+
''', re.VERBOSE | re.IGNORECASE)
13+
14+
hqre = re.compile(r'^[-a-zA-Z0-9!*+/\[\]., ]+$')
15+
16+
def base64_decode(s, convert_eols=None):
17+
"""Decode a raw base64 string.
18+
19+
If convert_eols is set to a string value, all canonical email linefeeds,
20+
e.g. "\\r\\n", in the decoded text will be converted to the value of
21+
convert_eols. os.linesep is a good choice for convert_eols if you are
22+
decoding a text attachment.
23+
24+
This function does not parse a full MIME header value encoded with
25+
base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
26+
level email.Header class for that functionality.
27+
28+
Taken from 'email' module
29+
"""
30+
if not s:
31+
return s
32+
33+
dec = a2b_base64(s)
34+
if convert_eols:
35+
return dec.replace(CRLF, convert_eols)
36+
return dec
37+
38+
def unquote_match(match):
39+
"""Turn a match in the form =AB to the ASCII character with value 0xab
40+
41+
Taken from 'email' module
42+
"""
43+
s = match.group(0)
44+
return chr(int(s[1:3], 16))
45+
46+
def qp_decode(s):
47+
"""Decode a string encoded with RFC 2045 MIME header `Q' encoding.
48+
49+
This function does not parse a full MIME header value encoded with
50+
quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
51+
the high level email.Header class for that functionality.
52+
53+
Taken from 'email' module
54+
"""
55+
s = s.replace('_', ' ')
56+
return re.sub(r'=\w{2}', unquote_match, s)
57+
58+
def _decode_header(header):
59+
"""Decode a message header value without converting charset.
60+
61+
Returns a list of (decoded_string, charset) pairs containing each of the
62+
decoded parts of the header. Charset is None for non-encoded parts of the
63+
header, otherwise a lower-case string containing the name of the character
64+
set specified in the encoded string.
65+
66+
Taken from 'email' module
67+
"""
68+
# If no encoding, just return the header
69+
header = str(header)
70+
if not ecre.search(header):
71+
return [(header, None)]
72+
73+
decoded = []
74+
dec = ''
75+
for line in header.splitlines():
76+
# This line might not have an encoding in it
77+
if not ecre.search(line):
78+
decoded.append((line, None))
79+
continue
80+
81+
parts = ecre.split(line)
82+
while parts:
83+
unenc = parts.pop(0)
84+
if unenc:
85+
if unenc.strip():
86+
decoded.append((unenc, None))
87+
if parts:
88+
charset, encoding = [s.lower() for s in parts[0:2]]
89+
encoded = parts[2]
90+
dec = ''
91+
if encoding == 'q':
92+
dec = qp_decode(encoded)
93+
elif encoding == 'b':
94+
dec = base64_decode(encoded)
95+
else:
96+
dec = encoded
97+
98+
if decoded and decoded[-1][1] == charset:
99+
decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
100+
else:
101+
decoded.append((dec, charset))
102+
del parts[0:3]
103+
return decoded
104+
105+
def decode_header(hdr):
106+
""" Decodes rfc2822 encoded header and return utf-8 encoded string
107+
"""
108+
if not hdr:
109+
return None
110+
outs = u""
111+
for section in _decode_header(hdr):
112+
charset = unaliasCharset(section[1])
113+
outs += unicode(section[0], charset or 'iso-8859-1', 'replace')
114+
return outs.encode('utf-8')
115+
116+
def encode_header(header):
117+
""" Will encode in quoted-printable encoding only if header
118+
contains non latin characters
119+
"""
120+
121+
# Return empty headers unchanged
122+
if not header:
123+
return header
124+
125+
global hqre
126+
# return plain header if it is not contains non-ascii characters
127+
if hqre.match(header):
128+
return header
129+
130+
charset = 'utf-8'
131+
quoted = ''
132+
#max_encoded = 76 - len(charset) - 7
133+
for c in header:
134+
# Space may be represented as _ instead of =20 for readability
135+
if c == ' ':
136+
quoted += '_'
137+
# These characters can be included verbatim
138+
elif hqre.match(c):
139+
quoted += c
140+
# Otherwise, replace with hex value like =E2
141+
else:
142+
quoted += "=%02X" % ord(c)
143+
plain = 0
144+
145+
return '=?%s?q?%s?=' % (charset, quoted)
146+
147+
def unaliasCharset(charset):
148+
if charset:
149+
return charset.lower().replace("windows-", 'cp')
150+
#return charset_table.get(charset.lower(), charset)
151+
return None
152+
153+
def test():
154+
print encode_header("Contrary, Mary")
155+
#print unaliasCharset('Windows-1251')
156+
157+
if __name__ == '__main__':
158+
test()
159+
160+
# vim: et

roundup/roundupdb.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
# $Id: roundupdb.py,v 1.77 2003-01-14 22:19:27 richard Exp $
18+
# $Id: roundupdb.py,v 1.78 2003-01-15 22:17:19 kedder Exp $
1919

2020
__doc__ = """
2121
Extending hyperdb with types specific to issue-tracking.
@@ -24,6 +24,9 @@
2424
import re, os, smtplib, socket, time, random
2525
import MimeWriter, cStringIO
2626
import base64, quopri, mimetypes
27+
28+
from rfc2822 import encode_header
29+
2730
# if available, use the 'email' module, otherwise fallback to 'rfc822'
2831
try :
2932
from email.Utils import formataddr as straddr
@@ -243,9 +246,10 @@ def send_message(self, nodeid, msgid, note, sendto, from_address=None):
243246
# create the message
244247
message = cStringIO.StringIO()
245248
writer = MimeWriter.MimeWriter(message)
246-
writer.addheader('Subject', '[%s%s] %s'%(cn, nodeid, title))
249+
writer.addheader('Subject', '[%s%s] %s'%(cn, nodeid, encode_header(title)))
247250
writer.addheader('To', ', '.join(sendto))
248-
writer.addheader('From', straddr((authname + from_tag, from_address)))
251+
writer.addheader('From', straddr((encode_header(authname) +
252+
from_tag, from_address)))
249253
writer.addheader('Reply-To', straddr((self.db.config.TRACKER_NAME,
250254
from_address)))
251255
writer.addheader('Date', time.strftime("%a, %d %b %Y %H:%M:%S +0000",
@@ -267,7 +271,7 @@ def send_message(self, nodeid, msgid, note, sendto, from_address=None):
267271
part = writer.startmultipartbody('mixed')
268272
part = writer.nextpart()
269273
part.addheader('Content-Transfer-Encoding', 'quoted-printable')
270-
body = part.startbody('text/plain')
274+
body = part.startbody('text/plain; charset=utf-8')
271275
body.write(content_encoded)
272276
for fileid in message_files:
273277
name = files.get(fileid, 'name')
@@ -295,7 +299,7 @@ def send_message(self, nodeid, msgid, note, sendto, from_address=None):
295299
writer.lastpart()
296300
else:
297301
writer.addheader('Content-Transfer-Encoding', 'quoted-printable')
298-
body = writer.startbody('text/plain')
302+
body = writer.startbody('text/plain; charset=utf-8')
299303
body.write(content_encoded)
300304

301305
# now try to send the message

roundup/templates/classic/html/_generic.help

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
<html>
22
<head>
33
<link rel="stylesheet" type="text/css" href="_file/style.css">
4+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8;">
45
</head>
56
<body class="body" marginwidth="0" marginheight="0">
67

roundup/templates/classic/html/page

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
<html metal:define-macro="icing">
22
<head>
33
<title metal:define-slot="head_title">title goes here</title>
4+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8;">
45

56
<link rel="stylesheet" type="text/css" href="_file/style.css">
67

roundup/templates/minimal/html/_generic.help

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
<html>
22
<head>
3+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8;">
34
<link rel="stylesheet" type="text/css" href="_file/style.css">
45
</head>
56
<body class="body" marginwidth="0" marginheight="0">

roundup/templates/minimal/html/page

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
<html metal:define-macro="icing">
22
<head>
33
<title metal:define-slot="head_title">title goes here</title>
4+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8;">
45

56
<link rel="stylesheet" type="text/css" href="_file/style.css">
67

0 commit comments

Comments
 (0)