Skip to content

Commit e0f8fb0

Browse files
committed
Make CSV import/export compatible across Python versions (also RDBMS journals) (issue 2550976, issue 2550975).
The roundup-admin export and import commands are used for migrating between different database backends. It is desirable that they should be usable also for migrations between Python 2 and Python 3, and in some cases (e.g. with the anydbm backend) this may be required. To be usable for such migrations, the format of the generated CSV files needs to be stable, meaning the same as currently used with Python 2. The export process uses repr() to produce the fields in the CSV files and eval() to convert them back to Python data structures. repr() of strings with non-ASCII characters produces different results for Python 2 and Python 3. This patch adds repr_export and eval_import functions to roundup/anypy/strings.py which provide the required operations that are just repr() and eval() in Python 2, but are more complicated in Python 3 to use data representations compatible with Python 2. These functions are then used in the required places for export and import. repr() and eval() are also used in storing the dict of changed values in the journal for the RDBMS backends. It is similarly desirable that the database be compatible between Python 2 and Python 3, so that export and import do not need to be used for a migration between Python versions for non-anydbm back ends. Thus, this patch changes rdbms_common.py in the places involved in storing journals in the database, not just in those involved in import/export. Given this patch, import/export with non-ASCII characters appear based on some limited testing to work across Python versions, and an instance using the sqlite backend appears to be compatible between Python versions without needing import/export, *if* the sessions/otks databases (which use anydbm) are deleted when changing Python version.
1 parent e18c620 commit e0f8fb0

File tree

5 files changed

+73
-20
lines changed

5 files changed

+73
-20
lines changed

roundup/admin.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from roundup.i18n import _
3333
from roundup.exceptions import UsageError
3434
from roundup.anypy.my_input import my_input
35+
from roundup.anypy.strings import repr_export
3536

3637
try:
3738
from UserDict import UserDict
@@ -1201,13 +1202,13 @@ class colon_separated(csv.excel):
12011202
sys.stdout.flush()
12021203
node = cl.getnode(nodeid)
12031204
exp = cl.export_list(propnames, nodeid)
1204-
lensum = sum ([len (repr(node[p])) for p in propnames])
1205+
lensum = sum ([len (repr_export(node[p])) for p in propnames])
12051206
# for a safe upper bound of field length we add
12061207
# difference between CSV len and sum of all field lengths
12071208
d = sum ([len(x) for x in exp]) - lensum
12081209
assert (d > 0)
12091210
for p in propnames:
1210-
ll = len(repr(node[p])) + d
1211+
ll = len(repr_export(node[p])) + d
12111212
if ll > max_len:
12121213
max_len = ll
12131214
writer.writerow(exp)

roundup/anypy/strings.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,51 @@ def uchr(c):
9696
return chr(c)
9797
else:
9898
return unichr(c)
99+
100+
# CSV files used for export and import represent strings in the style
101+
# used by repr in Python 2; this means that each byte of the UTF-8
102+
# representation is represented by a \x escape if not a printable
103+
# ASCII character. When such a representation is interpreted by eval
104+
# in Python 3, the effect is that the Unicode characters in the
105+
# resulting string correspond to UTF-8 bytes, so encoding the string
106+
# as ISO-8859-1 produces the correct byte-string which must then be
107+
# decoded as UTF-8 to produce the correct Unicode string. The same
108+
# representations are also used for journal storage in RDBMS
109+
# databases, so that the database can be compatible between Python 2
110+
# and Python 3.
111+
112+
def repr_export(v):
113+
"""Return a Python-2-style representation of a value for export to CSV."""
114+
if _py3:
115+
if isinstance(v, str):
116+
return repr(s2b(v))[1:]
117+
elif isinstance(v, dict):
118+
repr_vals = []
119+
for key, value in sorted(v.items()):
120+
repr_vals.append('%s: %s' % (repr_export(key),
121+
repr_export(value)))
122+
return '{%s}' % ', '.join(repr_vals)
123+
else:
124+
return repr(v)
125+
else:
126+
return repr(v)
127+
128+
def eval_import(s):
129+
"""Evaluate a Python-2-style value imported from a CSV file."""
130+
if _py3:
131+
v = eval(s)
132+
if isinstance(v, str):
133+
return v.encode('iso-8859-1').decode('utf-8')
134+
elif isinstance(v, dict):
135+
v_mod = {}
136+
for key, value in v.items():
137+
if isinstance(key, str):
138+
key = key.encode('iso-8859-1').decode('utf-8')
139+
if isinstance(value, str):
140+
value = value.encode('iso-8859-1').decode('utf-8')
141+
v_mod[key] = value
142+
return v_mod
143+
else:
144+
return v
145+
else:
146+
return eval(s)

roundup/backends/back_anydbm.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import os, marshal, re, weakref, string, copy, time, shutil, logging
2626

2727
from roundup.anypy.dbm_ import anydbm, whichdb
28-
from roundup.anypy.strings import b2s, bs2b
28+
from roundup.anypy.strings import b2s, bs2b, repr_export, eval_import
2929

3030
from roundup import hyperdb, date, password, roundupdb, security, support
3131
from roundup.backends import locking
@@ -2031,10 +2031,10 @@ def export_list(self, propnames, nodeid):
20312031
value = value.get_tuple()
20322032
elif isinstance(proptype, hyperdb.Password):
20332033
value = str(value)
2034-
l.append(repr(value))
2034+
l.append(repr_export(value))
20352035

20362036
# append retired flag
2037-
l.append(repr(self.is_retired(nodeid)))
2037+
l.append(repr_export(self.is_retired(nodeid)))
20382038

20392039
return l
20402040

@@ -2057,8 +2057,9 @@ def import_list(self, propnames, proplist):
20572057
# Figure the property for this column
20582058
propname = propnames[i]
20592059

2060-
# Use eval to reverse the repr() used to output the CSV
2061-
value = eval(proplist[i])
2060+
# Use eval_import to reverse the repr_export() used to
2061+
# output the CSV
2062+
value = eval_import(proplist[i])
20622063

20632064
# "unmarshal" where necessary
20642065
if propname == 'id':
@@ -2127,8 +2128,9 @@ def export_journals(self):
21272128
value = str(value)
21282129
export_data[propname] = value
21292130
params = export_data
2130-
r.append([repr(nodeid), repr(date), repr(user),
2131-
repr(action), repr(params)])
2131+
r.append([repr_export(nodeid), repr_export(date),
2132+
repr_export(user), repr_export(action),
2133+
repr_export(params)])
21322134
return r
21332135

21342136
class FileClass(hyperdb.FileClass, Class):

roundup/backends/rdbms_common.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969
from roundup.date import Range
7070

7171
from roundup.backends.back_anydbm import compile_expression
72-
from roundup.anypy.strings import b2s, bs2b, us2s
72+
from roundup.anypy.strings import b2s, bs2b, us2s, repr_export, eval_import
7373

7474

7575
# dummy value meaning "argument not passed"
@@ -1302,7 +1302,7 @@ def addjournal(self, classname, nodeid, action, params, creator=None,
13021302
if isinstance(params, type({})):
13031303
self._journal_marshal(params, classname)
13041304

1305-
params = repr(params)
1305+
params = repr_export(params)
13061306

13071307
dc = self.to_sql_value(hyperdb.Date)
13081308
journaldate = dc(journaldate)
@@ -1328,7 +1328,7 @@ def setjournal(self, classname, nodeid, journal):
13281328
# make the journalled data marshallable
13291329
if isinstance(params, type({})):
13301330
self._journal_marshal(params, classname)
1331-
params = repr(params)
1331+
params = repr_export(params)
13321332

13331333
self.save_journal(classname, cols, nodeid, dc(journaldate),
13341334
journaltag, action, params)
@@ -1366,7 +1366,7 @@ def getjournal(self, classname, nodeid):
13661366
res = []
13671367
properties = self.getclass(classname).getprops()
13681368
for nodeid, date_stamp, user, action, params in journal:
1369-
params = eval(params)
1369+
params = eval_import(params)
13701370
if isinstance(params, type({})):
13711371
for param, value in params.items():
13721372
if not value:
@@ -2892,8 +2892,8 @@ def export_list(self, propnames, nodeid):
28922892
value = value.get_tuple()
28932893
elif isinstance(proptype, hyperdb.Password):
28942894
value = str(value)
2895-
l.append(repr(value))
2896-
l.append(repr(self.is_retired(nodeid)))
2895+
l.append(repr_export(value))
2896+
l.append(repr_export(self.is_retired(nodeid)))
28972897
return l
28982898

28992899
def import_list(self, propnames, proplist):
@@ -2914,10 +2914,11 @@ def import_list(self, propnames, proplist):
29142914
if not "id" in propnames:
29152915
newid = self.db.newid(self.classname)
29162916
else:
2917-
newid = eval(proplist[propnames.index("id")])
2917+
newid = eval_import(proplist[propnames.index("id")])
29182918
for i in range(len(propnames)):
2919-
# Use eval to reverse the repr() used to output the CSV
2920-
value = eval(proplist[i])
2919+
# Use eval_import to reverse the repr_export() used to
2920+
# output the CSV
2921+
value = eval_import(proplist[i])
29212922

29222923
# Figure the property for this column
29232924
propname = propnames[i]
@@ -3010,7 +3011,7 @@ def export_journals(self):
30103011
# old tracker with data stored in the create!
30113012
params = {}
30123013
l = [nodeid, date, user, action, params]
3013-
r.append(list(map(repr, l)))
3014+
r.append(list(map(repr_export, l)))
30143015
return r
30153016

30163017
class FileClass(hyperdb.FileClass, Class):

roundup/hyperdb.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from roundup.i18n import _
3232
from roundup.cgi.exceptions import DetectorError
3333
from roundup.anypy.cmp_ import NoneAndDictComparable
34+
from roundup.anypy.strings import eval_import
3435

3536
logger = logging.getLogger('roundup.hyperdb')
3637

@@ -1498,7 +1499,7 @@ def import_journals(self, entries):
14981499
last = 0
14991500
r = []
15001501
for n, l in a:
1501-
nodeid, jdate, user, action, params = map(eval, l)
1502+
nodeid, jdate, user, action, params = map(eval_import, l)
15021503
assert (str(n) == nodeid)
15031504
if n != last:
15041505
if r:

0 commit comments

Comments
 (0)