|
40 | 40 | import sys |
41 | 41 | import time |
42 | 42 |
|
43 | | -version = "0.27" |
| 43 | +version = "0.28" |
44 | 44 | program = os.path.basename(sys.argv[0]) |
45 | 45 | progdir = os.path.dirname(sys.argv[0]) |
46 | 46 |
|
@@ -649,92 +649,107 @@ def dotexp(s): |
649 | 649 | first, last = author.rsplit(" ", 1) |
650 | 650 | else: |
651 | 651 | first, last = author.rsplit(" ", 1) |
| 652 | + if "." in first and not ". " in first: |
| 653 | + first = first.replace(".", ". ").strip() |
| 654 | + |
652 | 655 | prefix_match = re.search(" %(prefix)s$" % aux, first) |
653 | 656 | if prefix_match: |
654 | 657 | prefix = prefix_match.group(1) |
655 | 658 | first = first[:-len(prefix)].strip() |
656 | 659 | last = prefix+" "+last |
657 | 660 | _debug("First, Last: '%s' '%s'" % (first, last)) |
658 | 661 | for firstname, surname, casefixname in [ (first,last,last), (last,first,first), (first,last,last.upper()), (last,first,first.upper()), ]: |
659 | | - author = "%s %s" % (firstname, casefixname) |
660 | | - _debug("\nAuthors: "+str(authors)) |
661 | | - _debug("Author: "+author) |
662 | | - |
663 | | - # Pattern for full author information search, based on first page author name: |
664 | | - authpat = make_authpat(aux['honor'], firstname, casefixname, aux['suffix']) |
665 | | - _debug("Authpat: " + authpat) |
666 | | - start = 0 |
667 | | - col = None |
668 | | - # Find start of author info for this author (if any). |
669 | | - # Scan towards the front from the end of the file, looking for a match to authpath |
670 | | - for j in range(last_line, address_section_pos, -1): |
671 | | - line = self.lines[j] |
672 | | - _debug( "Line: " + line) |
673 | | - forms = [ line ] + [ line.replace(short, longform[short]) for short in longform if short in line ] |
674 | | - for form in forms: |
675 | | - try: |
676 | | - if re.search(authpat, form.strip()) and not j in found_pos: |
677 | | - _debug( "Match") |
678 | | - |
679 | | - start = j |
680 | | - found_pos += [ start ] |
681 | | - _debug( " ==> start %s, normalized '%s'" % (start, form.strip())) |
682 | | - # The author info could be formatted in multiple columns... |
683 | | - columns = re.split("( +| and )", form) |
684 | | - # _debug( "Columns:" + str(columns)) |
685 | | - # Find which column: |
686 | | - # _debug( "Col range:" + str(range(len(columns)))) |
687 | | - |
688 | | - cols = [ c for c in range(len(columns)) if re.search(authpat+r"( and |, |$)", columns[c].strip()) ] |
689 | | - if cols: |
690 | | - col = cols[0] |
691 | | - if not (start, col) in found_pos: |
692 | | - found_pos += [ (start, col) ] |
693 | | - _debug( "Col: %d" % col) |
694 | | - beg = len("".join(columns[:col])) |
695 | | - _debug( "Beg: %d '%s'" % (beg, "".join(columns[:col]))) |
696 | | - _debug( "Len: %d" % len(columns)) |
697 | | - if col == len(columns) or col == len(columns)-1: |
698 | | - end = None |
699 | | - _debug( "End1: %s" % end) |
700 | | - else: |
701 | | - end = beg + len("".join(columns[col:col+2])) |
702 | | - _debug( "End2: %d '%s'" % (end, "".join(columns[col:col+2]))) |
703 | | - _debug( "Cut: '%s'" % form[beg:end]) |
704 | | - author_match = re.search(authpat, columns[col].strip()).group(1) |
705 | | - _debug( "AuthMatch: '%s'" % (author_match,)) |
706 | | - if author_match in companies_seen: |
707 | | - companies[i] = authors[i] |
708 | | - authors[i] = None |
709 | | - else: |
710 | | - if casefixname in author_match: |
711 | | - fullname = author_match.replace(casefixname, surname) |
| 662 | + for left, right in [(firstname, casefixname), (casefixname, firstname)]: |
| 663 | + author = "%s %s" % (left, right) |
| 664 | + _debug("\nAuthors: "+str(authors)) |
| 665 | + _debug("Author: "+author) |
| 666 | + |
| 667 | + # Pattern for full author information search, based on first page author name: |
| 668 | + authpat = make_authpat(aux['honor'], left, right, aux['suffix']) |
| 669 | + _debug("Authpat: " + authpat) |
| 670 | + start = 0 |
| 671 | + col = None |
| 672 | + # Find start of author info for this author (if any). |
| 673 | + # Scan towards the front from the end of the file, looking for a match to authpath |
| 674 | + for j in range(last_line, address_section_pos, -1): |
| 675 | + line = self.lines[j] |
| 676 | + _debug( "Line: " + line) |
| 677 | + forms = [ line ] + [ line.replace(short, longform[short]) for short in longform if short in line ] |
| 678 | + for form in forms: |
| 679 | + try: |
| 680 | + if re.search(authpat, form.strip()) and not j in found_pos: |
| 681 | + _debug( "Match") |
| 682 | + |
| 683 | + start = j |
| 684 | + found_pos += [ start ] |
| 685 | + _debug( " ==> start %s, normalized '%s'" % (start, form.strip())) |
| 686 | + # The author info could be formatted in multiple columns... |
| 687 | + columns = re.split("( +| and )", form) |
| 688 | + # _debug( "Columns:" + str(columns)) |
| 689 | + # Find which column: |
| 690 | + # _debug( "Col range:" + str(range(len(columns)))) |
| 691 | + |
| 692 | + cols = [ c for c in range(len(columns)) if re.search(authpat+r"( and |, |$)", columns[c].strip()) ] |
| 693 | + if cols: |
| 694 | + col = cols[0] |
| 695 | + if not (start, col) in found_pos: |
| 696 | + found_pos += [ (start, col) ] |
| 697 | + _debug( "Col: %d" % col) |
| 698 | + beg = len("".join(columns[:col])) |
| 699 | + _debug( "Beg: %d '%s'" % (beg, "".join(columns[:col]))) |
| 700 | + _debug( "Len: %d" % len(columns)) |
| 701 | + if col == len(columns) or col == len(columns)-1: |
| 702 | + end = None |
| 703 | + _debug( "End1: %s" % end) |
712 | 704 | else: |
713 | | - fullname = author_match |
714 | | - fullname = re.sub(" +", " ", fullname) |
715 | | - given_names, surname = fullname.rsplit(None, 1) |
716 | | - if " " in given_names: |
717 | | - first, middle = given_names.split(None, 1) |
| 705 | + end = beg + len("".join(columns[col:col+2])) |
| 706 | + _debug( "End2: %d '%s'" % (end, "".join(columns[col:col+2]))) |
| 707 | + _debug( "Cut: '%s'" % form[beg:end]) |
| 708 | + author_match = re.search(authpat, columns[col].strip()).group(1) |
| 709 | + _debug( "AuthMatch: '%s'" % (author_match,)) |
| 710 | + if re.search('\(.*\)$', author_match.strip()): |
| 711 | + author_match = author_match.rsplit('(',1)[0].strip() |
| 712 | + if author_match in companies_seen: |
| 713 | + companies[i] = authors[i] |
| 714 | + authors[i] = None |
718 | 715 | else: |
719 | | - first = given_names |
720 | | - middle = None |
721 | | - names = (first, middle, surname, suffix) |
722 | | - if suffix: |
723 | | - fullname = fullname+" "+suffix |
724 | | - if not " ".join([ n for n in names if n ]) == fullname: |
725 | | - _err("Author tuple doesn't match text in draft: %s, %s" % (authors[i], fullname)) |
726 | | - authors[i] = (fullname, first, middle, surname, suffix) |
727 | | - companies[i] = None |
728 | | - #_debug( "Author: %s: %s" % (author_match, authors[author_match])) |
729 | | - break |
730 | | - except AssertionError, e: |
731 | | - sys.stderr.write("filename: "+self.filename+"\n") |
732 | | - sys.stderr.write("authpat: "+authpat+"\n") |
733 | | - raise |
| 716 | + fullname = author_match |
| 717 | + #if casefixname in author_match: |
| 718 | + # fullname = author_match.replace(casefixname, surname) |
| 719 | + #else: |
| 720 | + # fullname = author_match |
| 721 | + fullname = re.sub(" +", " ", fullname) |
| 722 | + if left == firstname: |
| 723 | + given_names, surname = fullname.rsplit(None, 1) |
| 724 | + else: |
| 725 | + surname, given_names = fullname.split(None, 1) |
| 726 | + if " " in given_names: |
| 727 | + first, middle = given_names.split(None, 1) |
| 728 | + else: |
| 729 | + first = given_names |
| 730 | + middle = None |
| 731 | + names = (first, middle, surname, suffix) |
| 732 | + if suffix: |
| 733 | + fullname = fullname+" "+suffix |
| 734 | + parts = [ n for n in names if n ] |
| 735 | + revpt = [ n for n in names if n ] |
| 736 | + revpt.reverse() |
| 737 | + if not ((" ".join(parts) == fullname) or (" ".join(revpt) == fullname)): |
| 738 | + _err("Author tuple doesn't match text in draft: %s, %s" % (authors[i], fullname)) |
| 739 | + authors[i] = (fullname, first, middle, surname, suffix) |
| 740 | + companies[i] = None |
| 741 | + break |
| 742 | + except AssertionError, e: |
| 743 | + sys.stderr.write("filename: "+self.filename+"\n") |
| 744 | + sys.stderr.write("authpat: "+authpat+"\n") |
| 745 | + raise |
| 746 | + if start and col != None: |
| 747 | + break |
734 | 748 | if start and col != None: |
735 | 749 | break |
736 | 750 | if start and col != None: |
737 | 751 | break |
| 752 | + # End for: |
738 | 753 | if not authors[i]: |
739 | 754 | continue |
740 | 755 | _debug("2: authors[%s]: %s" % (i, authors[i])) |
@@ -763,7 +778,7 @@ def dotexp(s): |
763 | 778 | # for a in authors: |
764 | 779 | # if a and a not in companies_seen: |
765 | 780 | # _debug("Search for: %s"%(r"(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)")) |
766 | | - authmatch = [ a for a in authors[i+1:] if a and not a.lower() in companies_seen and (re.search((r"(?i)(^|\W)"+re.sub("\.? ", ".* ", a)+"(\W|$)"), line.strip()) or acronym_match(a, line.strip()) )] |
| 781 | + authmatch = [ a for a in authors[i+1:] if a and not a.lower() in companies_seen and (re.search((r"(?i)(^|\W)"+re.sub("[. ]+", ".* ", a)+"(\W|$)"), line.strip()) or acronym_match(a, line.strip()) )] |
767 | 782 | if authmatch: |
768 | 783 | _debug(" ? Other author or company ? : %s" % authmatch) |
769 | 784 | _debug(" Line: "+line.strip()) |
|
0 commit comments