@@ -23,6 +23,8 @@ parser.add_argument('--slow', dest='slow_threshold', type=float, default=1.0,
2323parser .add_argument ('--settings' , help = 'Custom settings file' )
2424parser .add_argument ('--logfile' , help = 'Write to logfile' )
2525parser .add_argument ('--user' , help = 'Crawl logged in as this user' , default = None )
26+ parser .add_argument ('--no-follow' , dest = 'follow' , action = 'store_false' , default = True ,
27+ help = 'Do not follow URLs found in fetched pages, just check the given URLs' )
2628parser .add_argument ('--validator-nu' , dest = 'validator_nu' , action = 'store_true' ,
2729 help = 'Use validator.nu instead of html5lib for HTML validation' )
2830parser .add_argument ('--pedantic' , action = 'store_true' ,
@@ -384,7 +386,7 @@ if __name__ == "__main__":
384386
385387 if ctype == "text/html" :
386388 try :
387- if not skip_extract_from (url ):
389+ if args . follow and not skip_extract_from (url ):
388390 for u in extract_html_urls (r .content ):
389391 if u not in visited and u not in urls :
390392 urls [u ] = url
@@ -400,10 +402,11 @@ if __name__ == "__main__":
400402
401403 elif ctype == "application/json" :
402404 try :
403- for u in extract_tastypie_urls (r .content ):
404- if u not in visited and u not in urls :
405- urls [u ] = url
406- referrers [u ] = url
405+ if args .follow :
406+ for u in extract_tastypie_urls (r .content ):
407+ if u not in visited and u not in urls :
408+ urls [u ] = url
409+ referrers [u ] = url
407410 except :
408411 log ("error extracting urls from %s" % url )
409412 log ("=============" )
0 commit comments