Skip to content

Commit 97c52a9

Browse files
author
Richard Jones
committed
Added better tokenising to roundup-admin - handles spaces and stuff.
Can use quoting or backslashes. See the roundup.token pydoc.
1 parent a5f4b5c commit 97c52a9

File tree

4 files changed

+196
-7
lines changed

4 files changed

+196
-7
lines changed

roundup-admin

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,17 @@
1616
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1717
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1818
#
19-
# $Id: roundup-admin,v 1.55 2001-12-17 03:52:47 richard Exp $
19+
# $Id: roundup-admin,v 1.56 2001-12-31 05:09:20 richard Exp $
2020

2121
# python version check
2222
from roundup import version_check
2323

24-
import sys, os, getpass, getopt, re, UserDict
24+
import sys, os, getpass, getopt, re, UserDict, shlex
2525
try:
2626
import csv
2727
except ImportError:
2828
csv = None
29-
from roundup import date, hyperdb, roundupdb, init, password
29+
from roundup import date, hyperdb, roundupdb, init, password, token
3030
import roundup.instance
3131

3232
class CommandDict(UserDict.UserDict):
@@ -930,7 +930,7 @@ Command help:
930930
ret = 1
931931
return ret
932932

933-
def interactive(self, ws_re=re.compile(r'\s+')):
933+
def interactive(self):
934934
'''Run in an interactive mode
935935
'''
936936
print 'Roundup {version} ready for input.'
@@ -947,7 +947,7 @@ Command help:
947947
print 'exit...'
948948
break
949949
if not command: continue
950-
args = ws_re.split(command)
950+
args = token.token_split(command)
951951
if not args: continue
952952
if args[0] in ('quit', 'exit'): break
953953
self.run_command(args)
@@ -1000,6 +1000,13 @@ if __name__ == '__main__':
10001000

10011001
#
10021002
# $Log: not supported by cvs2svn $
1003+
# Revision 1.55 2001/12/17 03:52:47 richard
1004+
# Implemented file store rollback. As a bonus, the hyperdb is now capable of
1005+
# storing more than one file per node - if a property name is supplied,
1006+
# the file is called designator.property.
1007+
# I decided not to migrate the existing files stored over to the new naming
1008+
# scheme - the FileClass just doesn't specify the property name.
1009+
#
10031010
# Revision 1.54 2001/12/15 23:09:23 richard
10041011
# Some cleanups in roundup-admin, also made it work again...
10051012
#

roundup/token.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#
2+
# Copyright (c) 2001 Richard Jones.
3+
# This module is free software, and you may redistribute it and/or modify
4+
# under the same terms as Python, so long as this copyright message and
5+
# disclaimer are retained in their original form.
6+
#
7+
# This module is distributed in the hope that it will be useful,
8+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10+
#
11+
# $Id: token.py,v 1.1 2001-12-31 05:09:20 richard Exp $
12+
#
13+
14+
__doc__ = """
15+
This module provides the tokeniser used by roundup-admin.
16+
"""
17+
18+
def token_split(s, whitespace=' \r\n\t', quotes='\'"',
19+
escaped={'r':'\r', 'n':'\n', 't':'\t'}):
20+
'''Split the string up into tokens. An occurence of a ' or " in the
21+
input will cause the splitter to ignore whitespace until a matching
22+
quote char is found. Embedded non-matching quote chars are also
23+
skipped.
24+
Whitespace and quoting characters may be escaped using a backslash.
25+
\r, \n and \t are converted to carriage-return, newline and tab.
26+
All other backslashed characters are left as-is.
27+
Valid:
28+
hello world (2 tokens: hello, world)
29+
"hello world" (1 token: hello world)
30+
"Roch'e" Compaan (2 tokens: Roch'e Compaan)
31+
Roch\'e Compaan (2 tokens: Roch'e Compaan)
32+
address="1 2 3" (1 token: address=1 2 3)
33+
\\ (1 token: \)
34+
\n (1 token: a newline)
35+
\o (1 token: \o)
36+
Invalid:
37+
"hello world (no matching quote)
38+
Roch'e Compaan (no matching quote)
39+
'''
40+
l = []
41+
pos = 0
42+
NEWTOKEN = 'newtoken'
43+
TOKEN = 'token'
44+
QUOTE = 'quote'
45+
ESCAPE = 'escape'
46+
quotechar = ''
47+
state = NEWTOKEN
48+
oldstate = '' # one-level state stack ;)
49+
length = len(s)
50+
finish = 0
51+
token = ''
52+
while 1:
53+
# end of string, finish off the current token
54+
if pos == length:
55+
if state == QUOTE: raise ValueError, "unmatched quote"
56+
elif state == TOKEN: l.append(token)
57+
break
58+
c = s[pos]
59+
if state == NEWTOKEN:
60+
# looking for a new token
61+
if c in quotes:
62+
# quoted token
63+
state = QUOTE
64+
quotechar = c
65+
pos = pos + 1
66+
continue
67+
elif c in whitespace:
68+
# skip whitespace
69+
pos = pos + 1
70+
continue
71+
elif c == '\\':
72+
pos = pos + 1
73+
oldstate = TOKEN
74+
state = ESCAPE
75+
continue
76+
# otherwise we have a token
77+
state = TOKEN
78+
elif state == TOKEN:
79+
if c in whitespace:
80+
# have a token, and have just found a whitespace terminator
81+
l.append(token)
82+
pos = pos + 1
83+
state = NEWTOKEN
84+
token = ''
85+
continue
86+
elif c in quotes:
87+
# have a token, just found embedded quotes
88+
state = QUOTE
89+
quotechar = c
90+
pos = pos + 1
91+
continue
92+
elif c == '\\':
93+
pos = pos + 1
94+
oldstate = state
95+
state = ESCAPE
96+
continue
97+
elif state == QUOTE and c == quotechar:
98+
# in a quoted token and found a matching quote char
99+
pos = pos + 1
100+
# now we're looking for whitespace
101+
state = TOKEN
102+
continue
103+
elif state == ESCAPE:
104+
# escaped-char conversions (t, r, n)
105+
# TODO: octal, hexdigit
106+
state = oldstate
107+
if escaped.has_key(c):
108+
c = escaped[c]
109+
# just add this char to the token and move along
110+
token = token + c
111+
pos = pos + 1
112+
return l
113+
114+
#
115+
# $Log: not supported by cvs2svn $
116+
#
117+
#
118+
# vim: set filetype=python ts=4 sw=4 et si

test/__init__.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
1616
# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
1717
#
18-
# $Id: __init__.py,v 1.7 2001-08-07 00:24:43 richard Exp $
18+
# $Id: __init__.py,v 1.8 2001-12-31 05:09:20 richard Exp $
1919

2020
import unittest
2121

2222
import test_dates, test_schema, test_db, test_multipart, test_mailsplit
23-
import test_init
23+
import test_init, test_token
2424

2525
def go():
2626
suite = unittest.TestSuite((
@@ -30,12 +30,16 @@ def go():
3030
test_init.suite(),
3131
test_multipart.suite(),
3232
test_mailsplit.suite(),
33+
test_token.suite(),
3334
))
3435
runner = unittest.TextTestRunner()
3536
runner.run(suite)
3637

3738
#
3839
# $Log: not supported by cvs2svn $
40+
# Revision 1.7 2001/08/07 00:24:43 richard
41+
# stupid typo
42+
#
3943
# Revision 1.6 2001/08/07 00:15:51 richard
4044
# Added the copyright/license notice to (nearly) all files at request of
4145
# Bizar Software.

test/test_token.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#
2+
# Copyright (c) 2001 Richard Jones
3+
# This module is free software, and you may redistribute it and/or modify
4+
# under the same terms as Python, so long as this copyright message and
5+
# disclaimer are retained in their original form.
6+
#
7+
# This module is distributed in the hope that it will be useful,
8+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
10+
#
11+
# $Id: test_token.py,v 1.1 2001-12-31 05:09:20 richard Exp $
12+
13+
import unittest, time
14+
15+
from roundup.token import token_split
16+
17+
class TokenTestCase(unittest.TestCase):
18+
def testValid(self):
19+
l = token_split('hello world')
20+
self.assertEqual(l, ['hello', 'world'])
21+
22+
def testIgnoreExtraSpace(self):
23+
l = token_split('hello world ')
24+
self.assertEqual(l, ['hello', 'world'])
25+
26+
def testQuoting(self):
27+
l = token_split('"hello world"')
28+
self.assertEqual(l, ['hello world'])
29+
l = token_split("'hello world'")
30+
self.assertEqual(l, ['hello world'])
31+
32+
def testEmbedQuote(self):
33+
l = token_split(r'Roch\'e Compaan')
34+
self.assertEqual(l, ["Roch'e", "Compaan"])
35+
l = token_split('address="1 2 3"')
36+
self.assertEqual(l, ['address=1 2 3'])
37+
38+
def testEscaping(self):
39+
l = token_split('"Roch\'e" Compaan')
40+
self.assertEqual(l, ["Roch'e", "Compaan"])
41+
l = token_split(r'hello\ world')
42+
self.assertEqual(l, ['hello world'])
43+
l = token_split(r'\\')
44+
self.assertEqual(l, ['\\'])
45+
l = token_split(r'\n')
46+
self.assertEqual(l, ['\n'])
47+
48+
def testBadQuote(self):
49+
self.assertRaises(ValueError, token_split, '"hello world')
50+
self.assertRaises(ValueError, token_split, "Roch'e Compaan")
51+
52+
def suite():
53+
return unittest.makeSuite(TokenTestCase, 'test')
54+
55+
56+
#
57+
# $Log: not supported by cvs2svn $
58+
#
59+
#
60+
# vim: set filetype=python ts=4 sw=4 et si

0 commit comments

Comments
 (0)