@@ -450,8 +450,7 @@ def extract_authors(self):
450450 r"(?:, | )([Ee]d\.?|\([Ee]d\.?\)|[Ee]ditor)$" ,
451451 ]
452452 companyformats = [
453- r" {6}(([A-Za-z'][-A-Za-z0-9.& ']+)(,? ?Inc\.?))$" ,
454- r" {6}(([A-Za-z'][-A-Za-z0-9.& ']+)(,? ?Ltd\.?))$" ,
453+ r" {6}(([A-Za-z'][-A-Za-z0-9.& ']+)(,? ?(Inc|Ltd|AB|S\.A)\.?))$" ,
455454 r" {6}(([A-Za-z'][-A-Za-z0-9.& ']+)(/([A-Za-z'][-A-Za-z0-9.& ']+))+)$" ,
456455 r" {6}([a-z0-9.-]+)$" ,
457456 r" {6}(([A-Za-z'][-A-Za-z0-9.&']+)( [A-Za-z'][-A-Za-z0-9.&']+)*)$" ,
@@ -503,6 +502,7 @@ def dotexp(s):
503502 for line in self .lines [:30 ]:
504503 self ._docheader += line + "\n "
505504 author_on_line = False
505+ company_on_line = False
506506 _debug ( "**" + line )
507507 leading_space = len (re .findall ("^ *" , line )[0 ])
508508 line_len = len (line .rstrip ())
@@ -526,7 +526,7 @@ def dotexp(s):
526526 for lineformat , authformat in multiauthformats :
527527 match = re .search (lineformat , line )
528528 if match :
529- _debug ("Multiauth format: '%s'" % lineformat )
529+ _debug ("a. Multiauth format: '%s'" % lineformat )
530530 author_list = re .findall (authformat , line )
531531 authors += [ a [0 ] for a in author_list ]
532532 companies += [ None for a in author_list ]
@@ -540,22 +540,28 @@ def dotexp(s):
540540 for lineformat in authcompanyformats :
541541 match = re .search (lineformat , line )
542542 if match :
543- _debug ("Line format: '%s'" % lineformat )
544- author = match .group ("author" )
545- company = match .group ("company" )
546- authors += [ author , '' ]
547- companies += [ None , company ]
548- #_debug("\nLine: " + line)
549- #_debug("Format: " + authformat)
550- _debug ("Author: '%s'" % author )
551- _debug ("Company: '%s'" % company )
552- author_on_line = True
553- break
543+ _debug ("b. Line format: '%s'" % lineformat )
544+ maybe_company = match .group ("company" ).strip (" ,." )
545+ # is the putative company name just a partial name, i.e., a part
546+ # that commonly occurs after a comma as part of a company name,
547+ # as in "Foo Bar, Inc."? If so, skip; else assume there's a
548+ # company name after the comma.
549+ if not maybe_company in ["Inc" , "Ltd" , "S.A" , "AG" , "AB" , "N.V" , ]:
550+ author = match .group ("author" )
551+ company = match .group ("company" )
552+ authors += [ author , '' ]
553+ companies += [ None , company ]
554+ #_debug("\nLine: " + line)
555+ #_debug("Format: " + authformat)
556+ _debug ("Author: '%s'" % author )
557+ _debug ("Company: '%s'" % company )
558+ author_on_line = True
559+ break
554560 if not author_on_line :
555561 for authformat in authformats :
556562 match = re .search (authformat , line )
557563 if match :
558- _debug ("Auth format: '%s'" % authformat )
564+ _debug ("c. Auth format: '%s'" % authformat )
559565 author = match .group (1 )
560566 authors += [ author ]
561567 companies += [ None ]
@@ -568,10 +574,11 @@ def dotexp(s):
568574 for authformat in companyformats :
569575 match = re .search (authformat , line )
570576 if match :
571- _debug ("Auth format: '%s'" % authformat )
577+ _debug ("d. Company format: '%s'" % authformat )
572578 company = match .group (1 )
573579 authors += [ "" ]
574580 companies += [ company ]
581+ company_on_line = True
575582 #_debug("\nLine: " + line)
576583 #_debug("Format: " + authformat)
577584 _debug ("Company: '%s'" % company )
@@ -582,7 +589,7 @@ def dotexp(s):
582589 companies += [ "" ]
583590 if line .strip () == "" :
584591 if prev_blankline and authors :
585- _debug ("Breaking for having found consecutive blank lines after author name" )
592+ _debug ("Breaking, having found consecutive blank lines after author name" )
586593 break
587594 if authors :
588595 have_blankline = True
@@ -592,7 +599,7 @@ def dotexp(s):
592599 if "draft-" in line :
593600 have_draftline = True
594601 if have_blankline and have_draftline :
595- _debug ("Breaking for having found both blank line and draft-name line" )
602+ _debug ("Breaking, having found both blank line and draft-name line" )
596603 break
597604
598605 # remove trailing blank entries in the author list:
@@ -607,6 +614,8 @@ def dotexp(s):
607614 #companies = [ None if a else '' for a in authors ]
608615 #_debug("B:companies : %s" % str(companies))
609616 #find authors' addresses section if it exists
617+ _debug ("B:authors : %s" % str (authors ))
618+
610619 last_line = len (self .lines )- 1
611620 address_section_pos = last_line / 2
612621 for i in range (last_line / 2 ,last_line ):
@@ -990,7 +999,12 @@ def _output(docname, fields, outfile=sys.stdout):
990999 else :
9911000 if opt_attributes :
9921001 def outputkey (key , fields ):
993- outfile .write ("%-24s: %s\n " % ( key , fields [key ].strip ().replace ("\\ " , "\\ \\ " ).replace ("'" , "\\ x27" )))
1002+ field = fields [key ]
1003+ if "\n " in field :
1004+ field = "\n " + field .rstrip ()
1005+ else :
1006+ field = field .strip ()
1007+ outfile .write ("%-24s: %s\n " % ( key , field .replace ("\\ " , "\\ \\ " ).replace ("'" , "\\ x27" )))
9941008 else :
9951009 def outputkey (key , fields ):
9961010 outfile .write (" %s='%s'" % ( key .lower (), fields [key ].strip ().replace ("\\ " , "\\ \\ " ).replace ("'" , "\\ x27" ).replace ("\n " , "\\ n" )))
0 commit comments