Skip to content

Commit 1ee160d

Browse files
committed
Import authors from all_ids.state when the draft author information in
there appears to be in good shape - Legacy-Id: 3929
1 parent 9c06587 commit 1ee160d

1 file changed

Lines changed: 121 additions & 6 deletions

File tree

redesign/importing/import-docs.py

Lines changed: 121 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from ietf.doc.utils import get_tags_for_stream_id
1818
from ietf.group.models import *
1919
from ietf.name.models import *
20+
from ietf.person.models import *
21+
from ietf.person.name import name_parts
2022
from redesign.importing.utils import old_person_to_person, person_name, dont_save_queries
2123
from ietf.name.utils import name
2224
from ietf.idtracker.models import InternetDraft, IDInternal, IESGLogin, DocumentComment, PersonOrOrgInfo, Rfc, IESGComment, IESGDiscuss, BallotInfo, Position
@@ -27,6 +29,8 @@
2729

2830
from workflows.models import State as StateOld
2931

32+
ALL_IDS_STATE = "all_ids.state"
33+
3034
import_docs_from = document_name_to_import = document_id_to_import = None
3135
if len(sys.argv) > 1:
3236
if re.search("^\d+$", sys.argv[1]):
@@ -175,6 +179,85 @@ def alias_doc(name, doc):
175179

176180

177181
# helpers
182+
def extract_authors_from_dump():
183+
authors_re = re.compile(r"docauthors='([^']*);....-..-..'")
184+
name_email_re = re.compile(r"(.*) <([^>]+)>")
185+
email_brackets_re = re.compile(r" <[^>]*>")
186+
comma_re = re.compile(r".*,")
187+
colon_re = re.compile(r".*:")
188+
189+
email_mapping = {
190+
"barryleiba@computer.org": "barryleiba@gmail.com",
191+
"greg.daley@eng.monash.edu.au": "gdaley@netstarnetworks.com",
192+
"radia.perlman@sun.com": "radia@alum.mit.edu",
193+
"lisa@osafoundation.org": "lisa.dusseault@gmail.com",
194+
"lisa.dusseault@messagingarchitects.com": "lisa.dusseault@gmail.com",
195+
"scott.lawrence@nortel.com": "scottlawrenc@avaya.com",
196+
"charliep@computer.org": "charliep@computer.org, charles.perkins@earthlink.net",
197+
"yaronf@checkpoint.com": "yaronf.ietf@gmail.com",
198+
"mary.barnes@nortel.com": "mary.ietf.barnes@gmail.com",
199+
"scottlawrenc@avaya.com": "xmlscott@gmail.com",
200+
"henk@ripe.net": "henk@uijterwaal.nl",
201+
"jonne.soininen@nsn.com": "jonne.soininen@renesasmobile.com",
202+
"tom.taylor@rogers.com": "tom.taylor.stds@gmail.com",
203+
"rahul@juniper.net": "raggarwa_1@yahoo.com",
204+
"dward@juniper.net": "dward@cisco.com",
205+
"alan.ford@roke.co.uk": "alanford@cisco.com",
206+
}
207+
208+
res = {}
209+
210+
if not os.path.exists(ALL_IDS_STATE):
211+
print "WARNING: proceeding without author information in all_ids.state"
212+
return res
213+
214+
with open(ALL_IDS_STATE, "r") as author_source:
215+
for line in author_source:
216+
if line.startswith("#"):
217+
continue
218+
219+
draft_name = line.split(" ")[1]
220+
221+
m = authors_re.search(line)
222+
if not m:
223+
continue
224+
225+
l = []
226+
reliable = True
227+
for a in m.group(1).replace("\\x27", "'").replace("\\'", "'").decode("latin-1").split(", "):
228+
n = name_email_re.match(a)
229+
if n:
230+
name = n.group(1)
231+
email = n.group(2)
232+
else:
233+
name = a
234+
email = ""
235+
236+
if "@" not in email or not email:
237+
reliable = False
238+
239+
name = email_brackets_re.sub("", name)
240+
name = comma_re.sub("", name)
241+
name = colon_re.sub("", name)
242+
name = name.strip()
243+
244+
if "VCARD" in name or len(name.split()) > 5:
245+
reliable = False
246+
247+
if not reliable:
248+
break
249+
250+
email = email_mapping.get(email, email)
251+
252+
l.append((name, email))
253+
254+
if reliable:
255+
res[draft_name] = l
256+
257+
return res
258+
259+
author_dump = extract_authors_from_dump()
260+
178261
def save_docevent(doc, event, comment):
179262
event.time = comment.datetime()
180263
event.by = iesg_login_to_person(comment.created_by)
@@ -885,13 +968,45 @@ def import_from_idinternal(d, idinternal):
885968

886969
# authors
887970
d.authors.clear()
888-
for i, a in enumerate(o.authors.all().select_related("person").order_by('author_order', 'person')):
889-
try:
890-
e = Email.objects.get(address__iexact=a.email() or a.person.email()[1] or u"unknown-email-%s" % person_name(a.person).replace(" ", "-"))
891-
# renumber since old numbers may be a bit borked
971+
972+
authors_from_dump = author_dump.get(d.name)
973+
if authors_from_dump:
974+
for i, a in enumerate(authors_from_dump):
975+
name, email = a
976+
try:
977+
e = Email.objects.get(address__iexact=email)
978+
except Email.DoesNotExist:
979+
e = Email(address=email)
980+
981+
ps = Person.objects.filter(alias__name=name)
982+
if ps:
983+
p = ps[0]
984+
else:
985+
_, first, _, last, _ = name_parts(name)
986+
first = first.replace(".", "")
987+
988+
ps = Person.objects.filter(name__regex=u".*%s.*%s.*" % (first, last))
989+
if len(ps) == 1:
990+
p = ps[0]
991+
else:
992+
from ietf.utils import unaccent
993+
p = Person.objects.create(name=name, ascii=unaccent.asciify(name))
994+
Alias.objects.create(name=p.name, person=p)
995+
if p.ascii != p.name:
996+
Alias.objects.create(name=p.ascii, person=p)
997+
998+
e.person = p
999+
e.save()
1000+
8921001
DocumentAuthor.objects.create(document=d, author=e, order=i)
893-
except Email.DoesNotExist:
894-
print "SKIPPED author", unicode(a.person).encode('utf-8')
1002+
else:
1003+
for i, a in enumerate(o.authors.all().select_related("person").order_by('author_order', 'person')):
1004+
try:
1005+
e = Email.objects.get(address__iexact=a.email() or a.person.email()[1] or u"unknown-email-%s" % person_name(a.person).replace(" ", "-"))
1006+
# renumber since old numbers may be a bit borked
1007+
DocumentAuthor.objects.create(document=d, author=e, order=i)
1008+
except Email.DoesNotExist:
1009+
print "SKIPPED author", unicode(a.person).encode('utf-8')
8951010

8961011
# clear any already imported events
8971012
d.docevent_set.all().delete()

0 commit comments

Comments
 (0)