@@ -72,6 +72,9 @@ def note(s):
7272def strip_url (url ):
7373 if url .startswith ("http://testserver" ):
7474 url = url [len ("http://testserver" ):]
75+ fragment_url = re .search ("^(.+)#[a-z_.-]+$" , url )
76+ if fragment_url :
77+ url = fragment_url .group (1 )
7578 return url
7679
7780def extract_html_urls (content ):
@@ -142,6 +145,7 @@ def check_html_valid(url, response, args):
142145 key = re .sub ("/submit/status/nnnn/[0-9a-f]+/" , "/submit/status/nnnn/bar/" , key )
143146 key = re .sub ("/team/[a-z0-9-]+/" , "/team/foo/" , key )
144147 key = re .sub ("/wg/[a-z0-9-]+/" , "/wg/foo/" , key )
148+ key = re .sub ("/meeting/[0-9]+/agenda/[0-9a-z]+/" , "/meeting/nn/agenda/foo/" , key )
145149
146150 for slug in doc_types :
147151 key = re .sub ("/%s-.*/" % slug , "/%s-nnnn/" % slug , key )
@@ -189,6 +193,8 @@ def skip_url(url):
189193 for pattern in (
190194 "^/community/[0-9]+/remove_document/" ,
191195 "^/community/personal/" ,
196+ # Skip most of the slow pdf composite generation urls
197+ "^/meeting/[0-9]+/agenda/[0-9b-z].*-drafts\\ .pdf" ,
192198 ):
193199 if re .search (pattern , url ):
194200 return True
0 commit comments