Skip to content

Commit de0753f

Browse files
committed
Tweaked the test crawler a bit to skip some slow and meaningless checks.
- Legacy-Id: 11431
1 parent 377a84c commit de0753f

1 file changed

Lines changed: 6 additions & 0 deletions

File tree

bin/test-crawl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ def note(s):
7272
def strip_url(url):
7373
if url.startswith("http://testserver"):
7474
url = url[len("http://testserver"):]
75+
fragment_url = re.search("^(.+)#[a-z_.-]+$", url)
76+
if fragment_url:
77+
url = fragment_url.group(1)
7578
return url
7679

7780
def extract_html_urls(content):
@@ -142,6 +145,7 @@ def check_html_valid(url, response, args):
142145
key = re.sub("/submit/status/nnnn/[0-9a-f]+/", "/submit/status/nnnn/bar/", key)
143146
key = re.sub("/team/[a-z0-9-]+/", "/team/foo/", key)
144147
key = re.sub("/wg/[a-z0-9-]+/", "/wg/foo/", key)
148+
key = re.sub("/meeting/[0-9]+/agenda/[0-9a-z]+/", "/meeting/nn/agenda/foo/", key)
145149

146150
for slug in doc_types:
147151
key = re.sub("/%s-.*/"%slug, "/%s-nnnn/"%slug, key)
@@ -189,6 +193,8 @@ def skip_url(url):
189193
for pattern in (
190194
"^/community/[0-9]+/remove_document/",
191195
"^/community/personal/",
196+
# Skip most of the slow pdf composite generation urls
197+
"^/meeting/[0-9]+/agenda/[0-9b-z].*-drafts\\.pdf",
192198
):
193199
if re.search(pattern, url):
194200
return True

0 commit comments

Comments
 (0)