Skip to content

Commit 0164eca

Browse files
author
Richard Jones
committed
improved URL matching
1 parent c294c42 commit 0164eca

File tree

3 files changed

+56
-5
lines changed

3 files changed

+56
-5
lines changed

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed:
1919
- Prevent broken pipe errors in csv export (sf patch #1911449)
2020
- Session API and cleanup thanks anatoly t.
2121
- Make WSGI handler threadsafe (sf #1968027)
22+
- Improved URL matching RE (sf #2038858)
2223

2324

2425
2008-03-01 1.4.4

roundup/cgi/templating.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,9 +1245,19 @@ def is_view_ok(self):
12451245
return self.is_edit_ok()
12461246

12471247
class StringHTMLProperty(HTMLProperty):
1248-
hyper_re = re.compile(r'((?P<url>\w{3,6}://\S+)|'
1249-
r'(?P<email>[-+=%/\w\.]+@[\w\.\-]+)|'
1250-
r'(?P<item>(?P<class>[A-Za-z_]+)(\s*)(?P<id>\d+)))')
1248+
hyper_re = re.compile(r'''(
1249+
(?P<url>
1250+
((ht|f)tp(s?)://|www\.)? # prefix
1251+
([\w]+:\w+@)? # username/password
1252+
(([\w\-]+\.)+([\w]{2,5})) # hostname
1253+
(:[\d]{1,5})? # port
1254+
(/[\w\-$.+!*(),;:@&=?/~\\#%]*)? # path etc.
1255+
)|
1256+
(?P<email>[-+=%/\w\.]+@[\w\.\-]+)|
1257+
(?P<item>(?P<class>[A-Za-z_]+)(\s*)(?P<id>\d+))
1258+
)''', re.X | re.I)
1259+
protocol_re = re.compile('^(ht|f)tp(s?)://', re.I)
1260+
12511261
def _hyper_repl_item(self,match,replacement):
12521262
item = match.group('item')
12531263
cls = match.group('class').lower()
@@ -1263,8 +1273,16 @@ def _hyper_repl_item(self,match,replacement):
12631273

12641274
def _hyper_repl(self, match):
12651275
if match.group('url'):
1266-
s = match.group('url')
1267-
return '<a href="%s">%s</a>'%(s, s)
1276+
u = s = match.group('url')
1277+
if not self.protocol_re.search(s):
1278+
u = 'http://' + s
1279+
# catch an escaped ">" at the end of the URL
1280+
if s.endswith('&gt;'):
1281+
u = s = s[:-4]
1282+
e = '&gt;'
1283+
else:
1284+
e = ''
1285+
return '<a href="%s">%s</a>%s'%(u, s, e)
12681286
elif match.group('email'):
12691287
s = match.group('email')
12701288
return '<a href="mailto:%s">%s</a>'%(s, s)

test/test_templating.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,38 @@ def lookup(key) :
8888
cls = HTMLClass(self.client, "issue")
8989
cls["nosy"]
9090

91+
def test_url_match(self):
92+
'''Test the URL regular expression in StringHTMLProperty.
93+
'''
94+
def t(s, **groups):
95+
m = StringHTMLProperty.hyper_re.search(s)
96+
self.assertNotEquals(m, None, '%r did not match'%s)
97+
d = m.groupdict()
98+
for g in groups:
99+
self.assertEquals(d[g], groups[g], '%s %r != %r in %r'%(g, d[g],
100+
groups[g], s))
101+
102+
#t('123.321.123.321', 'url')
103+
t('http://roundup.net/', url='http://roundup.net/')
104+
t('<HTTP://roundup.net/>', url='HTTP://roundup.net/')
105+
t('www.a.ex', url='www.a.ex')
106+
t('http://a.ex', url='http://a.ex')
107+
t('http://a.ex/?foo&bar=baz\\.@!$%()qwerty',
108+
url='http://a.ex/?foo&bar=baz\\.@!$%()qwerty')
109+
t('www.net', url='www.net')
110+
111+
112+
t('i1', **{'class':'i', 'id':'1'})
113+
t('item123', **{'class':'item', 'id':'123'})
114+
115+
def test_url_replace(self):
116+
p = StringHTMLProperty(self.client, 'test', '1', None, 'test', '')
117+
def t(s): return p.hyper_re.sub(p._hyper_repl, s)
118+
ae = self.assertEquals
119+
ae(t('http://roundup.net/'), '<a href="http://roundup.net/">http://roundup.net/</a>')
120+
ae(t('&lt;HTTP://roundup.net/&gt;'), '&lt;<a href="HTTP://roundup.net/">HTTP://roundup.net/</a>&gt;')
121+
ae(t('&lt;www.roundup.net&gt;'), '&lt;<a href="http://www.roundup.net">www.roundup.net</a>&gt;')
122+
91123
'''
92124
class HTMLPermissions:
93125
def is_edit_ok(self):

0 commit comments

Comments
 (0)