@@ -87,12 +87,17 @@ fh, fn = tempfile.mkstemp(prefix="test-crawl-", suffix=".log", dir="../")
8787logfile = open (fn , "w" )
8888os .close (fh )
8989
90+ def log (s ):
91+ print (s )
92+ logfile .write (s )
93+ logfile .write ('\n ' )
94+
9095def get_referrers (url ):
9196 ref_list = []
9297 while url in referrers :
9398 url = referrers [url ]
9499 if url in ref_list :
95- print ("Circular referral list, discovered at %s" % url )
100+ log ("Circular referral list, discovered at %s" % url )
96101 break
97102 ref_list .append (url )
98103 return ref_list
@@ -107,13 +112,13 @@ while urls:
107112 r = client .get (url )
108113 elapsed = datetime .datetime .now () - timestamp
109114 except KeyboardInterrupt :
110- print " was fetching" , url
115+ log ( " ... was fetching %s" % url )
111116 sys .exit (1 )
112117 except :
113- print 500 , " %.3fs" % (datetime .datetime .now () - timestamp ).total_seconds (), url , "FAIL (from %s)" % ( " ,\n \t " .join (get_referrers (url )))
114- print "============="
115- print traceback .format_exc ()
116- print "============="
118+ log ( " 500 %.3fs %s FAIL (from: [ %s ]) " % (( datetime .datetime .now () - timestamp ).total_seconds (), url , ( " ,\n \t " .join (get_referrers (url )) )))
119+ log ( "=============" )
120+ log ( traceback .format_exc () )
121+ log ( "=============" )
117122 errors += 1
118123 else :
119124 tags = []
@@ -136,10 +141,10 @@ while urls:
136141 urls [u ] = url
137142 referrers [u ] = url
138143 except :
139- print "error extracting HTML urls from" , url
140- print "============="
141- print traceback .format_exc ()
142- print "============="
144+ log ( "error extracting HTML urls from %s" % url )
145+ log ( "=============" )
146+ log ( traceback .format_exc () )
147+ log ( "=============" )
143148 else :
144149 tags .append (u"FAIL (from %s)" % referrer )
145150 errors += 1
@@ -154,13 +159,9 @@ while urls:
154159 sec = acc_secs % 60
155160
156161 if (len (visited ) % 100 ) == 1 :
157- print ""
158- print "Elapsed Visited Queue Code Time Url ... Notes"
159-
160- logentry = "%s %.3fs %s %s" % (r .status_code , elapsed .total_seconds (), url , " " .join (tags ))
162+ log ("\n Elapsed Visited Queue Code Time Url ... Notes" )
161163
162- print "%2d:%02d:%02d" % (hrs ,min ,sec ), "%7d" % len (visited ), "%6d" % len (urls ), " " , logentry
163- logfile .write (logentry + "\n " )
164+ log ("%2d:%02d:%02d %7d %6d %s %.3fs %s %s" % (hrs ,min ,sec , len (visited ), len (urls ), r .status_code , elapsed .total_seconds (), url , " " .join (tags )))
164165
165166logfile .close ()
166167sys .stderr .write ("Output written to %s\n \n " % logfile .name )
0 commit comments