Improve test-crawler regexp so it can catch and visit linked feed URLs

OleLaursen · OleLaursen · commit 5dcd140a63a7 · 2014-01-10T17:34:33.000Z
- Legacy-Id: 7104
diff --git a/ietf/bin/test-crawl b/ietf/bin/test-crawl
@@ -33,7 +33,7 @@ def strip_url(url):
     return url
 
 def extract_html_urls(content):
-    for m in re.finditer(r'<a.*href="([^"]+)">', content):
+    for m in re.finditer(r'<(?:a|link) [^>]*href="([^"]+)"', content):
         url = strip_url(m.group(1))
         if len(url) > MAX_URL_LENGTH:
             continue # avoid infinite GET parameter appendages