|
17 | 17 | from ietf.doc.utils import get_tags_for_stream_id |
18 | 18 | from ietf.group.models import * |
19 | 19 | from ietf.name.models import * |
| 20 | +from ietf.person.models import * |
| 21 | +from ietf.person.name import name_parts |
20 | 22 | from redesign.importing.utils import old_person_to_person, person_name, dont_save_queries |
21 | 23 | from ietf.name.utils import name |
22 | 24 | from ietf.idtracker.models import InternetDraft, IDInternal, IESGLogin, DocumentComment, PersonOrOrgInfo, Rfc, IESGComment, IESGDiscuss, BallotInfo, Position |
|
27 | 29 |
|
28 | 30 | from workflows.models import State as StateOld |
29 | 31 |
|
| 32 | +ALL_IDS_STATE = "all_ids.state" |
| 33 | + |
30 | 34 | import_docs_from = document_name_to_import = document_id_to_import = None |
31 | 35 | if len(sys.argv) > 1: |
32 | 36 | if re.search("^\d+$", sys.argv[1]): |
@@ -175,6 +179,85 @@ def alias_doc(name, doc): |
175 | 179 |
|
176 | 180 |
|
177 | 181 | # helpers |
| 182 | +def extract_authors_from_dump(): |
| 183 | + authors_re = re.compile(r"docauthors='([^']*);....-..-..'") |
| 184 | + name_email_re = re.compile(r"(.*) <([^>]+)>") |
| 185 | + email_brackets_re = re.compile(r" <[^>]*>") |
| 186 | + comma_re = re.compile(r".*,") |
| 187 | + colon_re = re.compile(r".*:") |
| 188 | + |
| 189 | + email_mapping = { |
| 190 | + "barryleiba@computer.org": "barryleiba@gmail.com", |
| 191 | + "greg.daley@eng.monash.edu.au": "gdaley@netstarnetworks.com", |
| 192 | + "radia.perlman@sun.com": "radia@alum.mit.edu", |
| 193 | + "lisa@osafoundation.org": "lisa.dusseault@gmail.com", |
| 194 | + "lisa.dusseault@messagingarchitects.com": "lisa.dusseault@gmail.com", |
| 195 | + "scott.lawrence@nortel.com": "scottlawrenc@avaya.com", |
| 196 | + "charliep@computer.org": "charliep@computer.org, charles.perkins@earthlink.net", |
| 197 | + "yaronf@checkpoint.com": "yaronf.ietf@gmail.com", |
| 198 | + "mary.barnes@nortel.com": "mary.ietf.barnes@gmail.com", |
| 199 | + "scottlawrenc@avaya.com": "xmlscott@gmail.com", |
| 200 | + "henk@ripe.net": "henk@uijterwaal.nl", |
| 201 | + "jonne.soininen@nsn.com": "jonne.soininen@renesasmobile.com", |
| 202 | + "tom.taylor@rogers.com": "tom.taylor.stds@gmail.com", |
| 203 | + "rahul@juniper.net": "raggarwa_1@yahoo.com", |
| 204 | + "dward@juniper.net": "dward@cisco.com", |
| 205 | + "alan.ford@roke.co.uk": "alanford@cisco.com", |
| 206 | + } |
| 207 | + |
| 208 | + res = {} |
| 209 | + |
| 210 | + if not os.path.exists(ALL_IDS_STATE): |
| 211 | + print "WARNING: proceeding without author information in all_ids.state" |
| 212 | + return res |
| 213 | + |
| 214 | + with open(ALL_IDS_STATE, "r") as author_source: |
| 215 | + for line in author_source: |
| 216 | + if line.startswith("#"): |
| 217 | + continue |
| 218 | + |
| 219 | + draft_name = line.split(" ")[1] |
| 220 | + |
| 221 | + m = authors_re.search(line) |
| 222 | + if not m: |
| 223 | + continue |
| 224 | + |
| 225 | + l = [] |
| 226 | + reliable = True |
| 227 | + for a in m.group(1).replace("\\x27", "'").replace("\\'", "'").decode("latin-1").split(", "): |
| 228 | + n = name_email_re.match(a) |
| 229 | + if n: |
| 230 | + name = n.group(1) |
| 231 | + email = n.group(2) |
| 232 | + else: |
| 233 | + name = a |
| 234 | + email = "" |
| 235 | + |
| 236 | + if "@" not in email or not email: |
| 237 | + reliable = False |
| 238 | + |
| 239 | + name = email_brackets_re.sub("", name) |
| 240 | + name = comma_re.sub("", name) |
| 241 | + name = colon_re.sub("", name) |
| 242 | + name = name.strip() |
| 243 | + |
| 244 | + if "VCARD" in name or len(name.split()) > 5: |
| 245 | + reliable = False |
| 246 | + |
| 247 | + if not reliable: |
| 248 | + break |
| 249 | + |
| 250 | + email = email_mapping.get(email, email) |
| 251 | + |
| 252 | + l.append((name, email)) |
| 253 | + |
| 254 | + if reliable: |
| 255 | + res[draft_name] = l |
| 256 | + |
| 257 | + return res |
| 258 | + |
| 259 | +author_dump = extract_authors_from_dump() |
| 260 | + |
178 | 261 | def save_docevent(doc, event, comment): |
179 | 262 | event.time = comment.datetime() |
180 | 263 | event.by = iesg_login_to_person(comment.created_by) |
@@ -885,13 +968,45 @@ def import_from_idinternal(d, idinternal): |
885 | 968 |
|
886 | 969 | # authors |
887 | 970 | d.authors.clear() |
888 | | - for i, a in enumerate(o.authors.all().select_related("person").order_by('author_order', 'person')): |
889 | | - try: |
890 | | - e = Email.objects.get(address__iexact=a.email() or a.person.email()[1] or u"unknown-email-%s" % person_name(a.person).replace(" ", "-")) |
891 | | - # renumber since old numbers may be a bit borked |
| 971 | + |
| 972 | + authors_from_dump = author_dump.get(d.name) |
| 973 | + if authors_from_dump: |
| 974 | + for i, a in enumerate(authors_from_dump): |
| 975 | + name, email = a |
| 976 | + try: |
| 977 | + e = Email.objects.get(address__iexact=email) |
| 978 | + except Email.DoesNotExist: |
| 979 | + e = Email(address=email) |
| 980 | + |
| 981 | + ps = Person.objects.filter(alias__name=name) |
| 982 | + if ps: |
| 983 | + p = ps[0] |
| 984 | + else: |
| 985 | + _, first, _, last, _ = name_parts(name) |
| 986 | + first = first.replace(".", "") |
| 987 | + |
| 988 | + ps = Person.objects.filter(name__regex=u".*%s.*%s.*" % (first, last)) |
| 989 | + if len(ps) == 1: |
| 990 | + p = ps[0] |
| 991 | + else: |
| 992 | + from ietf.utils import unaccent |
| 993 | + p = Person.objects.create(name=name, ascii=unaccent.asciify(name)) |
| 994 | + Alias.objects.create(name=p.name, person=p) |
| 995 | + if p.ascii != p.name: |
| 996 | + Alias.objects.create(name=p.ascii, person=p) |
| 997 | + |
| 998 | + e.person = p |
| 999 | + e.save() |
| 1000 | + |
892 | 1001 | DocumentAuthor.objects.create(document=d, author=e, order=i) |
893 | | - except Email.DoesNotExist: |
894 | | - print "SKIPPED author", unicode(a.person).encode('utf-8') |
| 1002 | + else: |
| 1003 | + for i, a in enumerate(o.authors.all().select_related("person").order_by('author_order', 'person')): |
| 1004 | + try: |
| 1005 | + e = Email.objects.get(address__iexact=a.email() or a.person.email()[1] or u"unknown-email-%s" % person_name(a.person).replace(" ", "-")) |
| 1006 | + # renumber since old numbers may be a bit borked |
| 1007 | + DocumentAuthor.objects.create(document=d, author=e, order=i) |
| 1008 | + except Email.DoesNotExist: |
| 1009 | + print "SKIPPED author", unicode(a.person).encode('utf-8') |
895 | 1010 |
|
896 | 1011 | # clear any already imported events |
897 | 1012 | d.docevent_set.all().delete() |
|
0 commit comments