Skip to content

Commit 3266670

Browse files
committed
don't allow javascript URLs in markdown content
limit auto-linkification in markdown content to issue links to avoid interference with markdown link syntax
1 parent ed020fd commit 3266670

File tree

2 files changed

+54
-21
lines changed

2 files changed

+54
-21
lines changed

roundup/cgi/templating.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
__docformat__ = 'restructuredtext'
2121

22+
# List of schemes that are not rendered as links in rst and markdown.
23+
_disable_url_schemes = [ 'javascript' ]
2224

2325
import base64, cgi, re, os.path, mimetypes, csv, string
2426
import calendar
@@ -62,8 +64,8 @@ def _import_markdown2():
6264
try:
6365
import markdown2, re
6466
class Markdown(markdown2.Markdown):
65-
# don't restrict protocols in links
66-
_safe_protocols = re.compile('', re.IGNORECASE)
67+
# don't allow disabled protocols in links
68+
_safe_protocols = re.compile('(?!' + ':|'.join([re.escape(s) for s in _disable_url_schemes]) + ':)', re.IGNORECASE)
6769

6870
markdown = lambda s: Markdown(safe_mode='escape', extras={ 'fenced-code-blocks' : True }).convert(s)
6971
except ImportError:
@@ -75,9 +77,19 @@ def _import_markdown():
7577
try:
7678
from markdown import markdown as markdown_impl
7779
from markdown.extensions import Extension as MarkdownExtension
78-
79-
# make sure any HTML tags get escaped
80-
class EscapeHtml(MarkdownExtension):
80+
from markdown.treeprocessors import Treeprocessor
81+
82+
class RestrictLinksProcessor(Treeprocessor):
83+
def run(self, root):
84+
for el in root.iter('a'):
85+
if 'href' in el.attrib:
86+
url = el.attrib['href'].lstrip(' \r\n\t\x1a\0').lower()
87+
for s in _disable_url_schemes:
88+
if url.startswith(s + ':'):
89+
el.attrib['href'] = '#'
90+
91+
# make sure any HTML tags get escaped and some links restricted
92+
class SafeHtml(MarkdownExtension):
8193
def extendMarkdown(self, md, md_globals=None):
8294
if hasattr(md.preprocessors, 'deregister'):
8395
md.preprocessors.deregister('html_block')
@@ -88,15 +100,22 @@ def extendMarkdown(self, md, md_globals=None):
88100
else:
89101
del md.inlinePatterns['html']
90102

91-
markdown = lambda s: markdown_impl(s, extensions=[EscapeHtml(), 'fenced_code'])
103+
if hasattr(md.preprocessors, 'register'):
104+
md.treeprocessors.register(RestrictLinksProcessor(), 'restrict_links', 0)
105+
else:
106+
md.treeprocessors['restrict_links'] = RestrictLinksProcessor()
107+
108+
markdown = lambda s: markdown_impl(s, extensions=[SafeHtml(), 'fenced_code'])
92109
except ImportError:
93110
markdown = None
94111

95112
return markdown
96113

97114
def _import_mistune():
98115
try:
99-
from mistune import markdown
116+
import mistune
117+
mistune._scheme_blacklist = [ s + ':' for s in _disable_url_schemes ]
118+
markdown = mistune.markdown
100119
except ImportError:
101120
markdown = None
102121

@@ -1499,10 +1518,6 @@ class StringHTMLProperty(HTMLProperty):
14991518
'raw_enabled': 0,
15001519
'_disable_config': 1}
15011520

1502-
# List of schemes that are not rendered as links in rst.
1503-
# Could also be used to disable links for other processors:
1504-
# e.g. stext or markdown. If we can figure out how to do it.
1505-
disable_schemes = [ 'javascript' ]
15061521
valid_schemes = { }
15071522

15081523
def _hyper_repl(self, match):
@@ -1570,13 +1585,7 @@ def _hyper_repl_rst(self, match):
15701585
return match.group(0)
15711586

15721587
def _hyper_repl_markdown(self, match):
1573-
if match.group('url'):
1574-
s = match.group('url')
1575-
return '[%s](%s)'%(s, s)
1576-
elif match.group('email'):
1577-
s = match.group('email')
1578-
return '[%s](mailto:%s)'%(s, s)
1579-
elif len(match.group('id')) < 10:
1588+
if match.group('id') and len(match.group('id')) < 10:
15801589
return self._hyper_repl_item(match,'[%(item)s](%(cls)s%(id)s)')
15811590
else:
15821591
# just return the matched text
@@ -1670,7 +1679,7 @@ def rst(self, hyperlink=1):
16701679

16711680
# disable javascript and possibly other url schemes from working
16721681
from docutils.utils.urischemes import schemes
1673-
for sch in self.disable_schemes:
1682+
for sch in _disable_url_schemes:
16741683
# I catch KeyError but reraise if scheme didn't exist.
16751684
# Safer to fail if a disabled scheme isn't found. It may
16761685
# be a typo that keeps a bad scheme enabled. But this

test/test_templating.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -421,8 +421,32 @@ def test_input_xhtml(self):
421421
# common markdown test cases
422422
class MarkdownTests:
423423
def test_string_markdown(self):
424-
p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A string http://localhost with [email protected] <br> *embedded* \u00df'))
425-
self.assertEqual(p.markdown().strip(), u2s(u'<p>A string <a href="http://localhost">http://localhost</a> with <a href="mailto:[email protected]">[email protected]</a> &lt;br&gt; <em>embedded</em> \u00df</p>'))
424+
p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A string with <br> *embedded* \u00df'))
425+
self.assertEqual(p.markdown().strip(), u2s(u'<p>A string with &lt;br&gt; <em>embedded</em> \u00df</p>'))
426+
427+
def test_string_markdown_link(self):
428+
p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A link <http://localhost>'))
429+
self.assertEqual(p.markdown().strip(), u2s(u'<p>A link <a href="http://localhost">http://localhost</a></p>'))
430+
431+
def test_string_markdown_link(self):
432+
# markdown2 and markdown
433+
try:
434+
import html
435+
html_unescape = html.unescape
436+
except AttributeError:
437+
from HTMLParser import HTMLParser
438+
html_unescape = HTMLParser().unescape
439+
440+
p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A link <[email protected]>'))
441+
self.assertEqual(html_unescape(p.markdown().strip()), u2s(u'<p>A link <a href="mailto:[email protected]">[email protected]</a></p>'))
442+
443+
def test_string_markdown_javascript_link(self):
444+
# make sure we don't get a "javascript:" link
445+
p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'<javascript:alert(1)>'))
446+
self.assertTrue(p.markdown().find('href="javascript:') == -1)
447+
448+
p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'[link](javascript:alert(1))'))
449+
self.assertTrue(p.markdown().find('href="javascript:') == -1)
426450

427451
def test_string_markdown_code_block(self):
428452
p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'embedded code block\n\n```\nline 1\nline 2\n```\n\nnew paragraph'))

0 commit comments

Comments
 (0)