|
1 | 1 | #!/usr/bin/env python |
2 | 2 |
|
3 | | -import os, sys, re, datetime, argparse, traceback |
| 3 | +import os, sys, re, datetime, argparse, traceback, tempfile |
4 | 4 |
|
5 | 5 | # boilerplate |
6 | 6 | basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) |
@@ -77,6 +77,12 @@ for url in initial_urls: |
77 | 77 | urls[url] = "[initial]" |
78 | 78 |
|
79 | 79 | errors = 0 |
| 80 | +count = 0 |
| 81 | + |
| 82 | +start_time = datetime.datetime.now() |
| 83 | +fh, fn = tempfile.mkstemp(prefix="test-crawl-", suffix=".log", dir="../") |
| 84 | +logfile = open(fn, "w") |
| 85 | +os.close(fh) |
80 | 86 |
|
81 | 87 | while urls: |
82 | 88 | url, referrer = urls.popitem() |
@@ -126,7 +132,23 @@ while urls: |
126 | 132 | if elapsed.total_seconds() > slow_threshold: |
127 | 133 | tags.append("SLOW") |
128 | 134 |
|
129 | | - print r.status_code, "%.3fs" % elapsed.total_seconds(), url, " ".join(tags) |
| 135 | + acc_time = (timestamp - start_time).total_seconds() |
| 136 | + acc_secs = (timestamp - start_time).total_seconds() |
| 137 | + hrs = acc_secs // (60*60) |
| 138 | + min = (acc_secs % (60*60)) // 60 |
| 139 | + sec = acc_secs % 60 |
| 140 | + |
| 141 | + if (len(visited) % 100) == 1: |
| 142 | + print "" |
| 143 | + print "Elapsed Visited Queue Code Time Url ... Notes" |
| 144 | + |
| 145 | + logentry = "%s %.3fs %s %s" % (r.status_code, elapsed.total_seconds(), url, " ".join(tags)) |
| 146 | + |
| 147 | + print "%2d:%02d:%02d"%(hrs,min,sec), "%7d" % len(visited), "%6d" % len(urls), " ", logentry |
| 148 | + logfile.write(logentry+"\n") |
| 149 | + |
| 150 | +logfile.close() |
| 151 | +sys.stderr.write("Output written to %s" % logfile.name) |
130 | 152 |
|
131 | 153 | if errors > 0: |
132 | 154 | sys.stderr.write("Found %s errors, grep output for FAIL for details\n" % errors) |
|
0 commit comments