Skip to content

Commit 25186a0

Browse files
committed
Python 3 preparation: unicode.
This patch introduces roundup/anypy/strings.py, which has a comment explaining the string representations generally used and common functions to handle the required conversions. Places in the code that explicitly reference the "unicode" type / built-in function are generally changed to use the new functions (or, in a few places where those new functions don't seem to fit well, other approaches such as references to type(u'') or use of the codecs module). This patch does not generally attempt to address text conversions in any places not currently referencing the "unicode" type (although scripts/import_sf.py is made to use binary I/O in places as fixing the "unicode" reference didn't seem coherent otherwise).
1 parent c4979fc commit 25186a0

19 files changed

+134
-59
lines changed

roundup/anypy/strings.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# Roundup represents text internally using the native Python str type.
2+
# In Python 3, these are Unicode strings. In Python 2, these are
3+
# encoded using UTF-8, and the Python 2 unicode type is only used in a
4+
# few places, generally for interacting with external modules
5+
# requiring that type to be used.
6+
7+
import sys
8+
_py3 = sys.version_info[0] > 2
9+
10+
def b2s(b):
11+
"""Convert a UTF-8 encoded bytes object to the internal string format."""
12+
if _py3:
13+
return b.decode('utf-8')
14+
else:
15+
return b
16+
17+
def s2b(s):
18+
"""Convert a string object to UTF-8 encoded bytes."""
19+
if _py3:
20+
return s.encode('utf-8')
21+
else:
22+
return s
23+
24+
def s2u(s, errors='strict'):
25+
"""Convert a string object to a Unicode string."""
26+
if _py3:
27+
return s
28+
else:
29+
return unicode(s, 'utf-8', errors)
30+
31+
def u2s(u):
32+
"""Convert a Unicode string to the internal string format."""
33+
if _py3:
34+
return u
35+
else:
36+
return u.encode('utf-8')
37+
38+
def us2u(s, errors='strict'):
39+
"""Convert a string or Unicode string to a Unicode string."""
40+
if _py3:
41+
return s
42+
else:
43+
if isinstance(s, unicode):
44+
return s
45+
else:
46+
return unicode(s, 'utf-8', errors)
47+
48+
def us2s(u):
49+
"""Convert a string or Unicode string to the internal string format."""
50+
if _py3:
51+
return u
52+
else:
53+
if isinstance(u, unicode):
54+
return u.encode('utf-8')
55+
else:
56+
return u
57+
58+
def uany2s(u):
59+
"""Convert a Unicode string or other object to the internal string format.
60+
61+
Objects that are not Unicode strings are passed to str()."""
62+
if _py3:
63+
return str(u)
64+
else:
65+
if isinstance(u, unicode):
66+
return u.encode('utf-8')
67+
else:
68+
return str(u)
69+
70+
def is_us(s):
71+
"""Return whether an object is a string or Unicode string."""
72+
if _py3:
73+
return isinstance(s, str)
74+
else:
75+
return isinstance(s, str) or isinstance(s, unicode)

roundup/backends/back_sqlite.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from roundup import hyperdb, date, password
1414
from roundup.backends import rdbms_common
1515
from roundup.backends.sessions_dbm import Sessions, OneTimeKeys
16+
from roundup.anypy.strings import uany2s
1617

1718
sqlite_version = None
1819
try:
@@ -85,7 +86,7 @@ class Database(rdbms_common.Database):
8586
hyperdb.Multilink : lambda x: x, # used in journal marshalling
8687
}
8788
sql_to_hyperdb_value = {
88-
hyperdb.String : lambda x: isinstance(x, unicode) and x.encode('utf8') or str(x),
89+
hyperdb.String : uany2s,
8990
hyperdb.Date : lambda x: date.Date(str(x)),
9091
hyperdb.Link : str, # XXX numeric ids
9192
hyperdb.Interval : date.Interval,

roundup/backends/indexer_rdbms.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import re
66

77
from roundup.backends.indexer_common import Indexer as IndexerBase
8+
from roundup.anypy.strings import us2u, u2s
89

910
class Indexer(IndexerBase):
1011
def __init__(self, db):
@@ -61,10 +62,9 @@ def add_text(self, identifier, text, mime_type='text/plain'):
6162
self.db.cursor.execute(sql, (id, ))
6263

6364
# ok, find all the unique words in the text
64-
if not isinstance(text, unicode):
65-
text = unicode(text, "utf-8", "replace")
65+
text = us2u(text, "replace")
6666
text = text.upper()
67-
wordlist = [w.encode("utf-8")
67+
wordlist = [u2s(w)
6868
for w in re.findall(r'(?u)\b\w{%d,%d}\b'
6969
% (self.minlength, self.maxlength), text)]
7070
words = set()

roundup/backends/indexer_whoosh.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from whoosh import fields, qparser, index, query, analysis
66

77
from roundup.backends.indexer_common import Indexer as IndexerBase
8+
from roundup.anypy.strings import us2u
89

910
class Indexer(IndexerBase):
1011
def __init__(self, db):
@@ -78,8 +79,7 @@ def add_text(self, identifier, text, mime_type='text/plain'):
7879
if not text:
7980
text = u''
8081

81-
if not isinstance(text, unicode):
82-
text = unicode(text, "utf-8", "replace")
82+
text = us2u(text, "replace")
8383

8484
# We use the identifier twice: once in the actual "text" being
8585
# indexed so we can search on it, and again as the "data" being

roundup/backends/rdbms_common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
from roundup.date import Range
7070

7171
from roundup.backends.back_anydbm import compile_expression
72+
from roundup.anypy.strings import us2s
7273

7374

7475
# dummy value meaning "argument not passed"
@@ -2944,8 +2945,7 @@ def import_list(self, propnames, proplist):
29442945
elif isinstance(prop, hyperdb.Password):
29452946
value = password.Password(encrypted=value)
29462947
elif isinstance(prop, String):
2947-
if isinstance(value, unicode):
2948-
value = value.encode('utf8')
2948+
value = us2s(value)
29492949
if not isinstance(value, str):
29502950
raise TypeError('new property "%(propname)s" not a '
29512951
'string: %(value)r'%locals())

roundup/cgi/PageTemplates/TALES.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def evaluateText(self, expr):
231231
text = self.evaluate(expr)
232232
if text is Default or text is None:
233233
return text
234-
if isinstance(text, unicode):
234+
if isinstance(text, type(u'')):
235235
return text
236236
else:
237237
return ustr(text)

roundup/cgi/TranslationService.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,12 @@
1616
from roundup import i18n
1717
from roundup.cgi.PageTemplates import Expressions, PathIterator, TALES
1818
from roundup.cgi.TAL import TALInterpreter
19+
from roundup.anypy.strings import us2u, u2s
1920

2021
### Translation classes
2122

2223
class TranslationServiceMixin:
2324

24-
OUTPUT_ENCODING = "utf-8"
25-
2625
def translate(self, domain, msgid, mapping=None,
2726
context=None, target_language=None, default=None
2827
):
@@ -32,18 +31,15 @@ def translate(self, domain, msgid, mapping=None,
3231
return _msg
3332

3433
def gettext(self, msgid):
35-
if not isinstance(msgid, unicode):
36-
msgid = unicode(msgid, 'utf8')
34+
msgid = us2u(msgid)
3735
msgtrans=self.ugettext(msgid)
38-
return msgtrans.encode(self.OUTPUT_ENCODING)
36+
return u2s(msgtrans)
3937

4038
def ngettext(self, singular, plural, number):
41-
if not isinstance(singular, unicode):
42-
singular = unicode(singular, 'utf8')
43-
if not isinstance(plural, unicode):
44-
plural = unicode(plural, 'utf8')
39+
singular = us2u(singular)
40+
plural = us2u(plural)
4541
msgtrans=self.ungettext(singular, plural, number)
46-
return msgtrans.encode(self.OUTPUT_ENCODING)
42+
return u2s(msgtrans)
4743

4844
class TranslationService(TranslationServiceMixin, i18n.RoundupTranslations):
4945
pass
@@ -55,8 +51,7 @@ def ugettext(self, message):
5551
return self._fallback.ugettext(message)
5652
# Sometimes the untranslatable message is a UTF-8 encoded string
5753
# (thanks to PageTemplate's internals).
58-
if not isinstance(message, unicode):
59-
return unicode(message, 'utf8')
54+
message = us2u(message)
6055
return message
6156

6257
### TAL patching

roundup/cgi/engine_chameleon.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import chameleon
77

88
from roundup.cgi.templating import StringIO, context, TALLoaderBase
9+
from roundup.anypy.strings import s2u
910

1011
class Loader(TALLoaderBase):
1112
def __init__(self, dir):
@@ -27,7 +28,7 @@ def render(self, client, classname, request, **options):
2728
def translate(msgid, domain=None, mapping=None, default=None):
2829
result = client.translator.translate(domain, msgid,
2930
mapping=mapping, default=default)
30-
return unicode(result, client.translator.OUTPUT_ENCODING)
31+
return s2u(result)
3132

3233
output = self._pt.render(None, translate, **c)
3334
return output.encode(client.charset)

roundup/cgi/engine_jinja2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
# http://jinja.pocoo.org/docs/api/#loaders
4141

4242
from roundup.cgi.templating import context, LoaderBase, TemplateBase
43+
from roundup.anypy.strings import s2u
4344

4445
class Jinja2Loader(LoaderBase):
4546
def __init__(self, dir):
@@ -59,8 +60,7 @@ def __init__(self, dir):
5960
# The automatic conversion will assume 'ascii' and fail sometime.
6061
# Analysed with roundup 1.5.0 and jinja 2.7.1. See issue2550811.
6162
self._env.filters["u"] = lambda s: \
62-
unicode(s(), "utf-8") if type(s) == MethodType \
63-
else unicode(s, "utf-8")
63+
s2u(s()) if type(s) == MethodType else s2u(s)
6464

6565
def check(self, tplname):
6666
#print tplname

roundup/cgi/templating.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from roundup import hyperdb, date, support
3030
from roundup import i18n
3131
from roundup.i18n import _
32+
from roundup.anypy.strings import is_us, us2s, s2u, u2s
3233

3334
from .KeywordsExpr import render_keywords_expression_editor
3435

@@ -1774,7 +1775,7 @@ def field(self, labelfirst=False, y_label=None, n_label=None,
17741775
return self.plain(escape=1)
17751776

17761777
value = self._value
1777-
if isinstance(value, str) or isinstance(value, unicode):
1778+
if is_us(value):
17781779
value = value.strip().lower() in ('checked', 'yes', 'true',
17791780
'on', '1')
17801781

@@ -1827,8 +1828,7 @@ def __init__(self, client, classname, nodeid, prop, name, value,
18271828
anonymous=0, offset=None):
18281829
HTMLProperty.__init__(self, client, classname, nodeid, prop, name,
18291830
value, anonymous=anonymous)
1830-
if self._value and not (isinstance(self._value, str) or
1831-
isinstance(self._value, unicode)):
1831+
if self._value and not is_us(self._value):
18321832
self._value.setTranslator(self._client.translator)
18331833
self._offset = offset
18341834
if self._offset is None :
@@ -1910,9 +1910,9 @@ def field(self, size=30, default=None, format=_marker, popcal=True,
19101910
raise ValueError(self._('default value for '
19111911
'DateHTMLProperty must be either DateHTMLProperty '
19121912
'or string date representation.'))
1913-
elif isinstance(value, str) or isinstance(value, unicode):
1913+
elif is_us(value):
19141914
# most likely erroneous input to be passed back to user
1915-
if isinstance(value, unicode): value = value.encode('utf8')
1915+
value = us2s(value)
19161916
s = self.input(name=self._formname, value=value, size=size,
19171917
**kwargs)
19181918
if popcal:
@@ -1923,7 +1923,7 @@ def field(self, size=30, default=None, format=_marker, popcal=True,
19231923

19241924
if raw_value is None:
19251925
value = ''
1926-
elif isinstance(raw_value, str) or isinstance(raw_value, unicode):
1926+
elif is_us(raw_value):
19271927
if format is self._marker:
19281928
value = raw_value
19291929
else:
@@ -2012,7 +2012,7 @@ def __init__(self, client, classname, nodeid, prop, name, value,
20122012
anonymous=0):
20132013
HTMLProperty.__init__(self, client, classname, nodeid, prop,
20142014
name, value, anonymous)
2015-
if self._value and not isinstance(self._value, (str, unicode)):
2015+
if self._value and not is_us(self._value):
20162016
self._value.setTranslator(self._client.translator)
20172017

20182018
def plain(self, escape=0):
@@ -2967,9 +2967,9 @@ def batch(self, permission='View'):
29672967
klass = self.client.db.getclass(self.classname)
29682968
if self.search_text:
29692969
matches = self.client.db.indexer.search(
2970-
[w.upper().encode("utf-8", "replace") for w in re.findall(
2970+
[u2s(w.upper()) for w in re.findall(
29712971
r'(?u)\b\w{2,25}\b',
2972-
unicode(self.search_text, "utf-8", "replace")
2972+
s2u(self.search_text, "replace")
29732973
)], klass)
29742974
else:
29752975
matches = None

0 commit comments

Comments
 (0)