Skip to content

Commit 5bb9518

Browse files
committed
Added some new exceptions to the test-crawler; files which are known to not exist, and files with known html character problems.
- Legacy-Id: 13037
1 parent 4f1fbb7 commit 5bb9518

1 file changed

Lines changed: 25 additions & 4 deletions

File tree

bin/test-crawl

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,19 @@ def check_html_valid(url, response, args):
155155
if not key in validated_urls:
156156
note('Validate: %-32s: %s' % (url[:32], key))
157157
# These URLs have known issues, skip them until those are fixed
158-
if re.search('(/secr|admin/|/doc/.*/edit/info/|rfc542$|rfc776$|draft-leroux-pce-pcecp-interarea-reqs)', url):
159-
log("%s blacklisted; skipping HTML validation" % url)
160-
validated_urls[key] = True
161-
return
158+
for pattern in (
159+
'/secr',
160+
'admin/',
161+
'/doc/.*/edit/info/',
162+
'rfc542$',
163+
'rfc776$',
164+
'draft-leroux-pce-pcecp-interarea-reqs',
165+
'draft-fujiwara-dnsop-resolver-update',
166+
):
167+
if re.search(pattern, url):
168+
validated_urls[key] = True
169+
log("%s blacklisted; skipping HTML validation" % url)
170+
return
162171

163172
if hasattr(response, "content"):
164173
content = response.content
@@ -193,6 +202,7 @@ def skip_extract_from(url):
193202
for pattern in (
194203
r'^/doc/html/[a-z0-9-]+',
195204
r'^/meeting/[a-z0-9-]+/agenda/[a-z0-9-]+',
205+
r'^/static/coverage/',
196206
):
197207
if re.search(pattern, url):
198208
return True
@@ -209,6 +219,17 @@ def skip_url(url):
209219
r"/site/ietfdhcwg/_/rsrc/1311005436000/system/app/css/overlay.css\?cb=simple100%250150goog-ws-left",
210220
r"/dir/tsvdir/reviews/",
211221
r"draft-touch-msword-template-v2\.0",
222+
# These will always 404:
223+
r"^/doc/html/charter-ietf-cicm",
224+
r"^/doc/html/charter-ietf-dcon",
225+
r"^/doc/html/charter-ietf-fun",
226+
r"^/doc/html/charter-ietf-multrans",
227+
r"^/doc/html/charter-ietf-sdn",
228+
r"^/doc/html/charter-ietf-woes",
229+
r"^/doc/html/draft-floyd-cc-alt",
230+
r"^/doc/html/draft-ietf-sipping-overload-design",
231+
r"^/doc/html/status-change-icmpv6-dns-ipv6-to-internet-standard",
232+
r"^/static/coverage/",
212233
):
213234
if re.search(pattern, url):
214235
return True

0 commit comments

Comments
 (0)