Skip to content

Commit 3d48650

Browse files
committed
Another test-crawler tweak.
- Legacy-Id: 11433
1 parent f49471b commit 3d48650

1 file changed

Lines changed: 5 additions & 4 deletions

File tree

bin/test-crawl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,10 @@ def check_html_valid(url, response, args):
121121
key = url
122122
if not args.validate_all:
123123
# derive a key for urls like this by replacing primary keys
124-
key = re.sub("\?.*$", "", key)
125124
key = re.sub("#.*$", "", key)
126125
key = re.sub("/.+@.+/", "/x@x.org/", key)
127-
key = re.sub("/[0-9.]+/", "/nnnn/", key)
128126
key = re.sub("/[0-9.]+/", "/mmmm/", key)
127+
key = re.sub("/[0-9.]+/", "/nnnn/", key)
129128
key = re.sub("/ag/[a-z0-9-]+/", "/ag/foo/", key)
130129
key = re.sub("/area/[a-z0-9-]+/", "/area/foo/", key)
131130
key = re.sub("/bcp[0-9]+/", "/bcpnnn/", key)
@@ -134,6 +133,7 @@ def check_html_valid(url, response, args):
134133
key = re.sub("/draft-[a-z0-9-]+/", "/draft-foo/", key)
135134
key = re.sub("/group/[a-z0-9-]+/", "/group/foo/", key)
136135
key = re.sub("/ipr/search/.*", "/ipr/search/", key)
136+
key = re.sub("/meeting/[0-9]+/agenda/[0-9a-z]+/", "/meeting/nn/agenda/foo/", key)
137137
key = re.sub("/release/[0-9dev.]+/", "/release/n.n.n/", key)
138138
key = re.sub("/rfc[0-9]+/", "/rfcnnnn/", key)
139139
key = re.sub("/rg/[a-z0-9-]+/", "/rg/foo/", key)
@@ -145,7 +145,7 @@ def check_html_valid(url, response, args):
145145
key = re.sub("/submit/status/nnnn/[0-9a-f]+/", "/submit/status/nnnn/bar/", key)
146146
key = re.sub("/team/[a-z0-9-]+/", "/team/foo/", key)
147147
key = re.sub("/wg/[a-z0-9-]+/", "/wg/foo/", key)
148-
key = re.sub("/meeting/[0-9]+/agenda/[0-9a-z]+/", "/meeting/nn/agenda/foo/", key)
148+
key = re.sub("\?.*$", "", key)
149149

150150
for slug in doc_types:
151151
key = re.sub("/%s-.*/"%slug, "/%s-nnnn/"%slug, key)
@@ -193,8 +193,9 @@ def skip_url(url):
193193
for pattern in (
194194
"^/community/[0-9]+/remove_document/",
195195
"^/community/personal/",
196-
# Skip most of the slow pdf composite generation urls
196+
# Skip most of the slow pdf composite generation urls and svg urls
197197
"^/meeting/[0-9]+/agenda/[0-9b-z].*-drafts\\.pdf",
198+
"^/wg/[a-z0-9-]+/deps/svg/",
198199
):
199200
if re.search(pattern, url):
200201
return True

0 commit comments

Comments
 (0)