Skip to content

Commit 1834a41

Browse files
committed
Tweaked the test crawler to put the same information into the log as on screen.
- Legacy-Id: 8642
1 parent 6f82908 commit 1834a41

1 file changed

Lines changed: 17 additions & 16 deletions

File tree

bin/test-crawl

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,17 @@ fh, fn = tempfile.mkstemp(prefix="test-crawl-", suffix=".log", dir="../")
8787
logfile = open(fn, "w")
8888
os.close(fh)
8989

90+
def log(s):
91+
print(s)
92+
logfile.write(s)
93+
logfile.write('\n')
94+
9095
def get_referrers(url):
9196
ref_list = []
9297
while url in referrers:
9398
url = referrers[url]
9499
if url in ref_list:
95-
print ("Circular referral list, discovered at %s" % url)
100+
log("Circular referral list, discovered at %s" % url)
96101
break
97102
ref_list.append(url)
98103
return ref_list
@@ -107,13 +112,13 @@ while urls:
107112
r = client.get(url)
108113
elapsed = datetime.datetime.now() - timestamp
109114
except KeyboardInterrupt:
110-
print "was fetching", url
115+
log(" ... was fetching %s" % url)
111116
sys.exit(1)
112117
except:
113-
print 500, "%.3fs" % (datetime.datetime.now() - timestamp).total_seconds(), url, "FAIL (from %s)" % (",\n\t".join(get_referrers(url)))
114-
print "============="
115-
print traceback.format_exc()
116-
print "============="
118+
log("500 %.3fs %s FAIL (from: [ %s ])" % ((datetime.datetime.now() - timestamp).total_seconds(), url, (",\n\t".join(get_referrers(url)))))
119+
log("=============")
120+
log(traceback.format_exc())
121+
log("=============")
117122
errors += 1
118123
else:
119124
tags = []
@@ -136,10 +141,10 @@ while urls:
136141
urls[u] = url
137142
referrers[u] = url
138143
except:
139-
print "error extracting HTML urls from", url
140-
print "============="
141-
print traceback.format_exc()
142-
print "============="
144+
log("error extracting HTML urls from %s" % url)
145+
log("=============")
146+
log(traceback.format_exc())
147+
log("=============")
143148
else:
144149
tags.append(u"FAIL (from %s)" % referrer)
145150
errors += 1
@@ -154,13 +159,9 @@ while urls:
154159
sec = acc_secs % 60
155160

156161
if (len(visited) % 100) == 1:
157-
print ""
158-
print "Elapsed Visited Queue Code Time Url ... Notes"
159-
160-
logentry = "%s %.3fs %s %s" % (r.status_code, elapsed.total_seconds(), url, " ".join(tags))
162+
log("\nElapsed Visited Queue Code Time Url ... Notes")
161163

162-
print "%2d:%02d:%02d"%(hrs,min,sec), "%7d" % len(visited), "%6d" % len(urls), " ", logentry
163-
logfile.write(logentry+"\n")
164+
log("%2d:%02d:%02d %7d %6d %s %.3fs %s %s" % (hrs,min,sec, len(visited), len(urls), r.status_code, elapsed.total_seconds(), url, " ".join(tags)))
164165

165166
logfile.close()
166167
sys.stderr.write("Output written to %s\n\n" % logfile.name)

0 commit comments

Comments
 (0)