We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 1b36eec commit 948804fCopy full SHA for 948804f
1 file changed
bin/test-crawl
@@ -65,7 +65,7 @@ def strip_url(url):
65
return url
66
67
def extract_html_urls(content):
68
- for m in re.finditer(r'(<(?:a|link) [^>]*href=[\'"]([^"]+)[\'"][^>]*>)', content):
+ for m in re.finditer(r'(<(?:(?:a|link) [^>]*href|(?:img|script) [^>]*src)=[\'"]([^"]+)[\'"][^>]*>)', content):
69
if re.search(r'rel=["\']?nofollow["\']', m.group(1)):
70
continue
71
0 commit comments