Skip to content

Commit f484528

Browse files
committed
Changed test-crawl to avoid unnecessary repetitions of the blacklisting message.
- Legacy-Id: 9933
1 parent 6a30898 commit f484528

1 file changed

Lines changed: 10 additions & 5 deletions

File tree

bin/test-crawl

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import os, sys, re, datetime, argparse, traceback, tempfile, json, subprocess
44
import html5lib
5-
import debug # pyflakes:ignore
65
import random
76

87
# Set up import path to find our own Django
@@ -42,6 +41,9 @@ import django.test
4241

4342
django.setup()
4443

44+
# This needs to come after we set up sys path to include the local django
45+
import debug # pyflakes:ignore
46+
4547
# prevent memory from leaking when settings.DEBUG=True
4648
from django.db import connection
4749
class DontSaveQueries(object):
@@ -103,10 +105,6 @@ def extract_tastypie_urls(content):
103105

104106
def check_html_valid(url, response, args):
105107
global parser, validated_urls, doc_types, warnings
106-
# These URLs have known issues, skip them until those are fixed
107-
if re.search('(/secr|admin/)|/doc/.*/edit/info/', url):
108-
log("%s blacklisted; skipping HTML validation" % url)
109-
return
110108
key = url
111109
if not args.validate_all:
112110
# derive a key for urls like this by replacing primary keys
@@ -123,6 +121,13 @@ def check_html_valid(url, response, args):
123121
key = re.sub("/%s-.*/"%slug, "/%s-nnnn/"%slug, key)
124122

125123
if not key in validated_urls:
124+
125+
# These URLs have known issues, skip them until those are fixed
126+
if re.search('(/secr|admin/)|/doc/.*/edit/info/', url):
127+
log("%s blacklisted; skipping HTML validation" % url)
128+
validated_urls[key] = True
129+
return
130+
126131
if hasattr(response, "content"):
127132
content = response.content
128133
else:

0 commit comments

Comments
 (0)