Skip to content

Commit 68911cf

Browse files
author
Alexander Smishlajev
committed
implemented HTTP charset negotiation.
charset is kept in the browser cookie 'roundup_charset' and may be changed by the form parameter '@charset'. HTML rendered from templates is encoded and form fields are decoded according to selected charset. for other 'text/html' output (not coming from template rendering) specify 'charset=utf-8' in HTTP 'Content-Type' header unless already set.
1 parent 0ea98c7 commit 68911cf

File tree

1 file changed

+99
-12
lines changed

1 file changed

+99
-12
lines changed

roundup/cgi/client.py

Lines changed: 99 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
# $Id: client.py,v 1.176 2004-05-04 05:56:54 richard Exp $
1+
# $Id: client.py,v 1.177 2004-05-11 13:32:05 a1s Exp $
22

33
"""WWW request handler (also used in the stand-alone server).
44
"""
55
__docformat__ = 'restructuredtext'
66

77
import os, os.path, cgi, StringIO, urlparse, re, traceback, mimetypes, urllib
88
import binascii, Cookie, time, random, stat, rfc822
9+
import codecs
910

1011

1112
from roundup import roundupdb, date, hyperdb, password
@@ -86,6 +87,11 @@ class Client:
8687
actually be one of either ":" or "@".
8788
'''
8889

90+
# charset used for data storage and form templates
91+
# Note: must be in lower case for comparisons!
92+
# XXX take this from instance.config?
93+
STORAGE_CHARSET = 'utf-8'
94+
8995
#
9096
# special form variables
9197
#
@@ -145,6 +151,9 @@ def __init__(self, instance, request, env, form=None):
145151
self.additional_headers = {}
146152
self.response_code = 200
147153

154+
# parse cookies (used in charset and session lookups)
155+
self.cookie = Cookie.SimpleCookie(self.env.get('HTTP_COOKIE', ''))
156+
148157
def main(self):
149158
''' Wrap the real main in a try/finally so we always close off the db.
150159
'''
@@ -187,6 +196,8 @@ def inner_main(self):
187196
self.ok_message = []
188197
self.error_message = []
189198
try:
199+
self.determine_charset()
200+
190201
# figure out the context and desired content template
191202
# do this first so we don't authenticate for static files
192203
# Note: this method opens the database as "admin" in order to
@@ -201,7 +212,7 @@ def inner_main(self):
201212
html = self.handle_action()
202213

203214
if html:
204-
self.write(html)
215+
self.write_html(html)
205216
return
206217

207218
# now render the page
@@ -215,18 +226,18 @@ def inner_main(self):
215226
self.additional_headers['Expires'] = date
216227

217228
# render the content
218-
self.write(self.renderContext())
229+
self.write_html(self.renderContext())
219230

220231
except SeriousError, message:
221-
self.write(str(message))
232+
self.write_html(str(message))
222233
except Redirect, url:
223234
# let's redirect - if the url isn't None, then we need to do
224235
# the headers, otherwise the headers have been set before the
225236
# exception was raised
226237
if url:
227238
self.additional_headers['Location'] = url
228239
self.response_code = 302
229-
self.write('Redirecting to <a href="%s">%s</a>'%(url, url))
240+
self.write_html('Redirecting to <a href="%s">%s</a>'%(url, url))
230241
except SendFile, designator:
231242
self.serve_file(designator)
232243
except SendStaticFile, file:
@@ -241,16 +252,16 @@ def inner_main(self):
241252
self.classname = self.nodeid = None
242253
self.template = ''
243254
self.error_message.append(message)
244-
self.write(self.renderContext())
255+
self.write_html(self.renderContext())
245256
except NotFound:
246257
# pass through
247258
raise
248259
except FormError, e:
249260
self.error_message.append(_('Form Error: ') + str(e))
250-
self.write(self.renderContext())
261+
self.write_html(self.renderContext())
251262
except:
252263
# everything else
253-
self.write(cgitb.html())
264+
self.write_html(cgitb.html())
254265

255266
def clean_sessions(self):
256267
"""Age sessions, remove when they haven't been used for a week.
@@ -274,6 +285,59 @@ def clean_sessions(self):
274285
sessions.set('last_clean', last_use=time.time())
275286
self.db.commit()
276287

288+
def determine_charset(self):
289+
"""Look for client charset in the form parameters or browser cookie.
290+
291+
If no charset requested by client, use storage charset (utf-8).
292+
293+
If the charset is found, and differs from the storage charset,
294+
recode all form fields of type 'text/plain'
295+
"""
296+
# default to storage charset
297+
self.charset = self.STORAGE_CHARSET
298+
# look for client charset
299+
if self.form.has_key('@charset'):
300+
charset = self.form['@charset'].value
301+
elif self.cookie.has_key('roundup_charset'):
302+
charset = self.cookie['roundup_charset'].value
303+
else:
304+
charset = None
305+
if charset:
306+
# make sure the charset is recognized
307+
try:
308+
codecs.lookup(charset)
309+
except LookupError:
310+
self.error_message.append(_('Unrecognized charset: %r')
311+
% charset)
312+
else:
313+
self.charset = charset.lower()
314+
315+
# if client charset is different from the storage charset,
316+
# recode form fields
317+
# XXX this requires FieldStorage from Python library.
318+
# mod_python FieldStorage is not supported!
319+
if self.charset != self.STORAGE_CHARSET:
320+
decoder = codecs.getdecoder(self.charset)
321+
encoder = codecs.getencoder(self.STORAGE_CHARSET)
322+
re_charref = re.compile('&#([0-9]+|x[0-9a-f]+);', re.IGNORECASE)
323+
def _decode_charref(matchobj):
324+
num = matchobj.group(1)
325+
if num[0].lower() == 'x':
326+
uc = int(num[1:], 16)
327+
else:
328+
uc = int(num)
329+
return unichr(uc)
330+
331+
for field_name in self.form.keys():
332+
field = self.form[field_name]
333+
if (field.type == 'text/plain') and not field.filename:
334+
try:
335+
value = decoder(field.value)[0]
336+
except UnicodeError:
337+
continue
338+
value = re_charref.sub(_decode_charref, value)
339+
field.value = encoder(value)[0]
340+
277341
def determine_user(self):
278342
''' Determine who the user is
279343
'''
@@ -292,7 +356,7 @@ def determine_user(self):
292356
pass
293357

294358
# look up the user session cookie (may override the REMOTE_USER)
295-
cookie = Cookie.SimpleCookie(self.env.get('HTTP_COOKIE', ''))
359+
cookie = self.cookie
296360
user = 'anonymous'
297361
if (cookie.has_key(self.cookie_name) and
298362
cookie[self.cookie_name].value != 'deleted'):
@@ -591,6 +655,29 @@ def write(self, content):
591655
self.header()
592656
self.request.wfile.write(content)
593657

658+
def write_html(self, content):
659+
if not self.headers_done:
660+
# at this point, we are sure about Content-Type
661+
self.additional_headers['Content-Type'] = \
662+
'text/html; charset=%s' % self.charset
663+
# set the charset cookie
664+
# Note: we want to preserve the session cookie
665+
# set by LoginAction or ConfRegoAction.
666+
# i think that's ok: user does not perform
667+
# two actions (login and charset toggle) simultaneously.
668+
if not self.additional_headers.has_key('Set-Cookie'):
669+
# the charset is remembered for a year
670+
expire = Cookie._getdate(86400*365)
671+
self.additional_headers['Set-Cookie'] = \
672+
'roundup_charset=%s; expires=%s; Path=%s;' % (
673+
self.charset, expire, self.cookie_path)
674+
self.header()
675+
if self.charset != self.STORAGE_CHARSET:
676+
# recode output
677+
content = content.decode(self.STORAGE_CHARSET, 'replace')
678+
content = content.encode(self.charset, 'xmlcharrefreplace')
679+
self.request.wfile.write(content)
680+
594681
def setHeader(self, header, value):
595682
'''Override a header to be returned to the user's browser.
596683
'''
@@ -600,15 +687,15 @@ def header(self, headers=None, response=None):
600687
'''Put up the appropriate header.
601688
'''
602689
if headers is None:
603-
headers = {'Content-Type':'text/html'}
690+
headers = {'Content-Type':'text/html; charset=utf-8'}
604691
if response is None:
605692
response = self.response_code
606693

607694
# update with additional info
608695
headers.update(self.additional_headers)
609696

610-
if not headers.has_key('Content-Type'):
611-
headers['Content-Type'] = 'text/html'
697+
if headers.get('Content-Type', 'text/html') == 'text/html':
698+
headers['Content-Type'] = 'text/html; charset=utf-8'
612699
self.request.send_response(response)
613700
for entry in headers.items():
614701
self.request.send_header(*entry)

0 commit comments

Comments
 (0)