Support markdown2 2.4.10, 2.4.8- and exclude 2.4.9

rouilj · rouilj · commit c6652fe1301e · 2023-07-23T16:50:35.000-04:00
Handle these changes to markdown2

  version 2.4.9 broke links like (issue1)[issue1]: raise error if used

  Version 2.4.10 changed how filtering of schemes is done: adapt to new
     method


Mail url's in markdown are formatted
[label](mailto:user@something.com). The markdown format wrapper uses
the plain text formatter to turn issue1 and user@something.com into
markdown formatted strings to be htmlized by the markdown formatters.

However when the plain text formatter saw (mailto:user@something.com)
it made it (mailto:&lt;user@something.com&gt;). This is broken as the enamil
address shouldn't have the angle brackets. By modifying the email
pattern to include an optional mailto:, all three markdown formatters
do the right thing and I don't end up with href="&lt;user@something.com&gt;"
in the link.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -22,6 +22,9 @@ Fixed:
 - issue2551184 - improve i18n handling. Patch to test to make sure it
   uses the test tracker's locale files and not other locale
   files. (Marcus Priesch)
+- issue2551283 - fail if version 2.4.9 of markdown2 is used, it's
+  broken. Support version 2.4.10 with its new schema filtering
+  method and 2.4.8 and earlier. (John Rouillard)
 
 Features:
 
diff --git a/roundup/cgi/templating.py b/roundup/cgi/templating.py
@@ -61,10 +61,49 @@ def _import_markdown2():
         import markdown2
         import re
 
-        class Markdown(markdown2.Markdown):
-            # don't allow disabled protocols in links
-            _safe_protocols = re.compile('(?!' + ':|'.join([
-                re.escape(s) for s in _disable_url_schemes])
+        # Note: version 2.4.9 does not work with Roundup as it breaks
+        # [issue1](issue1) formatted links.
+
+        # Versions 2.4.8 and 2.4.10 use different methods to filter
+        # allowed schemes. 2.4.8 uses a pre-compiled regexp while
+        # 2.4.10 uses a regexp string that it compiles.
+
+        markdown2_vi = markdown2.__version_info__
+        if  markdown2_vi > (2, 4, 9):
+            # Create the filtering regexp.
+            # Allowed default is same as what hyper_re supports.
+
+            # pathed_schemes are terminated with ://
+            pathed_schemes =  [ 'http', 'https', 'ftp', 'ftps' ]
+            # non_pathed are terminated with a :
+            non_pathed_schemes = [ "mailto" ]
+
+            for disabled in _disable_url_schemes:
+                try:
+                    pathed_schemes.remove(disabled)
+                except ValueError:  # if disabled not in list
+                    pass
+                try:
+                    non_pathed_schemes.remove(disabled)
+                except ValueError:
+                    pass
+
+            re_list = []
+            for scheme in pathed_schemes:
+                re_list.append(r'(?:%s)://' % scheme)
+            for scheme in non_pathed_schemes:
+                re_list.append(r'(?:%s):' % scheme)
+
+            enabled_schemes = r"|".join(re_list)
+            class Markdown(markdown2.Markdown):
+                _safe_protocols = enabled_schemes
+        elif markdown2_vi == (2, 4, 9):
+            raise RuntimeError("Unsupported version - markdown2 v2.4.9\n")
+        else:
+            class Markdown(markdown2.Markdown):
+                # don't allow disabled protocols in links
+                _safe_protocols = re.compile('(?!' + ':|'.join([
+                    re.escape(s) for s in _disable_url_schemes])
                                          + ':)', re.IGNORECASE)
 
         def _extras(config):
@@ -1639,7 +1678,7 @@ class StringHTMLProperty(HTMLProperty):
          (:[\d]{1,5})?                     # port
          (/[\w\-$.+!*(),;:@&=?/~\\#%]*)?   # path etc.
         )|
-        (?P<email>[-+=%/\w\.]+@[\w\.\-]+)|
+        (?P<email>(?:mailto:)?[-+=%/\w\.]+@[\w\.\-]+)|
         (?P<item>(?P<class>[A-Za-z_]+)(\s*)(?P<id>\d+)(?P<fragment>\#[^][\#%^{}"<>\s]+)?)
     )''', re.X | re.I)
     protocol_re = re.compile('^(ht|f)tp(s?)://', re.I)
diff --git a/test/test_templating.py b/test/test_templating.py
@@ -11,6 +11,8 @@
 import pytest
 from .pytest_patcher import mark_class
 
+from markdown2 import __version_info__ as md2__version_info__
+
 if ReStructuredText:
     skip_rst = lambda func, *args, **kwargs: func
 else:
@@ -774,51 +776,122 @@ def test_string_markdown(self):
         self.assertEqual(p.markdown().strip(), u2s(u'<p>A string with &lt;br&gt; <em>embedded</em> \u00df</p>'))
 
     def test_string_markdown_link(self):
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A link <http://localhost>'))
-        self.assertEqual(p.markdown().strip(), u2s(u'<p>A link <a href="http://localhost">http://localhost</a></p>'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'A link <http://localhost>'))
+        m = p.markdown().strip()
+        m = self.mangleMarkdown2(m)
+
+        self.assertEqual( u2s(u'<p>A link <a href="http://localhost" rel="nofollow noopener">http://localhost</a></p>'), m)
 
     def test_string_markdown_link_item(self):
         """ The link formats for the different markdown engines changes.
             Order of attributes, value for rel (noopener, nofollow etc)
             is different. So most tests check for a substring that indicates
             success rather than the entire returned string.
         """
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An issue1 link'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An issue1 link'))
         self.assertIn( u2s(u'href="issue1"'), p.markdown().strip())
         # just verify that plain linking is working
         self.assertIn( u2s(u'href="issue1"'), p.plain(hyperlink=1))
 
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An [issue1](issue1) link'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issue1](issue1) link'))
         self.assertIn( u2s(u'href="issue1"'), p.markdown().strip())
         # just verify that plain linking is working
         self.assertIn( u2s(u'href="issue1"'), p.plain(hyperlink=1))
 
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An [issue1](https://example.com/issue1) link'))
-        self.assertIn( u2s(u'href="https://example.com/issue1"'), p.markdown().strip())
+        p = StringHTMLProperty(
+            self.client, 'test', '1', None, 'test',
+            u2s(u'An [issue1](https://example.com/issue1) link'))
+        self.assertIn( u2s(u'href="https://example.com/issue1"'),
+                       p.markdown().strip())
+
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issu1](#example) link'))
+        self.assertIn( u2s(u'href="#example"'), p.markdown().strip())
+
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issu1](/example) link'))
+        self.assertIn( u2s(u'href="/example"'), p.markdown().strip())
+
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issu1](./example) link'))
+        self.assertIn( u2s(u'href="./example"'), p.markdown().strip())
 
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An [issue1] (https://example.com/issue1) link'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issu1](../example) link'))
+        self.assertIn( u2s(u'href="../example"'), p.markdown().strip())
+
+        p = StringHTMLProperty(
+            self.client, 'test', '1', None, 'test',
+            u2s(u'A [wuarchive_ftp](ftp://www.wustl.gov/file) link'))
+        self.assertIn( u2s(u'href="ftp://www.wustl.gov/file"'),
+                       p.markdown().strip())
+
+        p = StringHTMLProperty(
+            self.client, 'test', '1', None, 'test',
+            u2s(u'An [issue1] (https://example.com/issue1) link'))
         self.assertIn( u2s(u'href="issue1"'), p.markdown().strip())
         if type(self) == MistuneTestCase:
             # mistune makes the https url into a real link
-            self.assertIn( u2s(u'href="https://example.com/issue1"'), p.markdown().strip())
+            self.assertIn( u2s(u'href="https://example.com/issue1"'),
+                           p.markdown().strip())
         else:
             # the other two engines leave the parenthesized url as is.
-            self.assertIn( u2s(u' (https://example.com/issue1) link'), p.markdown().strip())
+            self.assertIn( u2s(u' (https://example.com/issue1) link'),
+                           p.markdown().strip())
 
-    def test_string_markdown_link(self):
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issu1](.../example) link'))
+        if (isinstance(self, Markdown2TestCase) and 
+           md2__version_info__ > (2, 4, 9)):
+            # markdown2 > 2.4.9 handles this differently
+            self.assertIn( u2s(u'href="#"'), p.markdown().strip())
+        else:
+            self.assertIn( u2s(u'href=".../example"'), p.markdown().strip())
+            
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'A [phone](tel:0016175555555) link'))
+        if (isinstance(self, Markdown2TestCase) and
+           md2__version_info__ > (2, 4, 9)):
+            self.assertIn(u2s(u'href="#"'), p.markdown().strip())
+        else:
+            self.assertIn( u2s(u'href="tel:0016175555555"'),
+                           p.markdown().strip())
+
+    def test_string_email_markdown_link(self):
         # markdown2 and markdown escape the email address
         try:
             from html import unescape as html_unescape
         except ImportError:
             from HTMLParser import HTMLParser
             html_unescape = HTMLParser().unescape
 
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A link <cmeerw@example.com>'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'A link <cmeerw@example.com>'))
         m = html_unescape(p.markdown().strip())
         m = self.mangleMarkdown2(m)
 
         self.assertEqual(m, u2s(u'<p>A link <a href="mailto:cmeerw@example.com">cmeerw@example.com</a></p>'))
 
+        p = StringHTMLProperty(
+            self.client, 'test', '1', None, 'test',
+            u2s(u'An bare email baduser@daemons.com link'))
+        m = self.mangleMarkdown2(html_unescape(p.markdown().strip()))
+        self.assertIn( u2s(u'href="mailto:baduser@daemons.com"'),
+                       m)
+        
+        p = StringHTMLProperty(
+            self.client, 'test', '1', None, 'test',
+            u2s(u'An [email_url](mailto:baduser@daemons.com) link'))
+        m = self.mangleMarkdown2(html_unescape(p.markdown().strip()))
+        
+        if isinstance(self, MistuneTestCase):
+            self.assertIn('<a href="mailto:baduser@daemons.com" rel="nofollow noopener">email_url</a>', m)            
+        else:
+            self.assertIn('<a href="mailto:baduser@daemons.com">email_url</a>', m)
+
     def test_string_markdown_javascript_link(self):
         # make sure we don't get a "javascript:" link
         p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'<javascript:alert(1)>'))
@@ -866,7 +939,7 @@ def test_string_markdown_code_block_attribute(self):
         if type(self) == MistuneTestCase:
             self.assertEqual(m, parser.normalize('<p>embedded code block &lt;pre&gt;</p>\n<pre><code class="lang-python">line 1\nline 2\n</code></pre>\n<p>new &lt;/pre&gt; paragraph</p>'))
         elif type(self) == MarkdownTestCase:
-            self.assertEqual(m, parser.normalize('<p>embedded code block &lt;pre&gt;</p>\n<pre><code class="language-python">line 1\nline 2\n</code></pre>\n<p>new &lt;/pre&gt; paragraph</p>'))
+            self.assertEqual(m.replace('class="python"','class="language-python"'), parser.normalize('<p>embedded code block &lt;pre&gt;</p>\n<pre><code class="language-python">line 1\nline 2\n</code></pre>\n<p>new &lt;/pre&gt; paragraph</p>'))
         else:
             expected_result = parser.normalize('<p>embedded code block &lt;pre&gt;</p>\n<div class="codehilite"><pre><span></span><code><span class="n">line</span> <span class="mi">1</span>\n<span class="n">line</span> <span class="mi">2</span>\n</code></pre></div>\n<p>new &lt;/pre&gt; paragraph</p>')
             self.assertEqual(m, expected_result)