Skip to content

Commit 644c7fc

Browse files
committed
Fixed import problems (all drafts and RFCs from rfc_index_mirror can now be imported)
- Legacy-Id: 2739
1 parent 754469c commit 644c7fc

4 files changed

Lines changed: 177 additions & 52 deletions

File tree

redesign/doc/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ class DocAlias(models.Model):
146146
to by RFC number, primarily, after achieving RFC status.
147147
"""
148148
document = models.ForeignKey(Document)
149-
name = models.CharField(max_length=255)
149+
name = models.CharField(max_length=255, db_index=True)
150150
def __unicode__(self):
151151
return "%s-->%s" % (self.name, self.document.name)
152152
document_link = admin_link("document")

redesign/doc/proxy.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,9 @@ def expired_tombstone(self):
162162
#idinternal = FKAsOneToOne('idinternal', reverse=True, query=models.Q(rfc_flag = 0))
163163
@property
164164
def idinternal(self):
165-
print self.iesg_state
166-
return self if self.iesg_state else None
165+
# since IDInternal is now merged into the document, we try to
166+
# guess here
167+
return self if self.iesg_state or self.latest_event(type="changed_ballot_position") else None
167168

168169
# reverse relationship
169170
@property
@@ -646,7 +647,7 @@ def author_order(self):
646647
return self.order
647648

648649
def email(self):
649-
return self.author.address
650+
return None if self.author.address.startswith("unknown-email") else self.author.address
650651

651652
def final_author_order(self):
652653
return self.order

redesign/import-document-state.py

Lines changed: 159 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from redesign.doc.models import *
1515
from redesign.group.models import *
1616
from redesign.name.models import *
17-
from ietf.idtracker.models import InternetDraft, IDInternal, IESGLogin, DocumentComment, PersonOrOrgInfo, Rfc, IESGComment, IESGDiscuss, BallotInfo
17+
from ietf.idtracker.models import InternetDraft, IDInternal, IESGLogin, DocumentComment, PersonOrOrgInfo, Rfc, IESGComment, IESGDiscuss, BallotInfo, Position
1818
from ietf.idrfc.models import RfcIndex, DraftVersions
1919

2020
import sys
@@ -125,9 +125,15 @@ def alias_doc(name, doc):
125125
'Abstain': name(BallotPositionName, 'abstain', 'Abstain'),
126126
'Discuss': name(BallotPositionName, 'discuss', 'Discuss'),
127127
'Recuse': name(BallotPositionName, 'recuse', 'Recuse'),
128-
'Undefined': name(BallotPositionName, 'norecord', 'No record'),
129-
None: name(BallotPositionName, 'norecord', 'No record'),
128+
'No Record': name(BallotPositionName, 'norecord', 'No record'),
130129
}
130+
ballot_position_mapping["no"] = ballot_position_mapping['No Objection']
131+
ballot_position_mapping["yes"] = ballot_position_mapping['Yes']
132+
ballot_position_mapping["discuss"] = ballot_position_mapping['Discuss']
133+
ballot_position_mapping["abstain"] = ballot_position_mapping['Abstain']
134+
ballot_position_mapping["recuse"] = ballot_position_mapping['Recuse']
135+
ballot_position_mapping[None] = ballot_position_mapping["No Record"]
136+
ballot_position_mapping["Undefined"] = ballot_position_mapping["No Record"]
131137

132138
substate_mapping = {
133139
"External Party": name(DocInfoTagName, 'extpty', "External Party", 'The document is awaiting review or input from an external party (i.e, someone other than the shepherding AD, the authors, or the WG). See the "note" field for more details on who has the action.'),
@@ -203,9 +209,14 @@ def iesg_login_to_email(l):
203209

204210
# regexps for parsing document comments
205211

206-
date_re_str = "(?P<year>[0-9][0-9][0-9][0-9])-(?P<month>[0-9][0-9])-(?P<day>[0-9][0-9])"
212+
date_re_str = "(?P<year>[0-9][0-9][0-9][0-9])-(?P<month>[0-9][0-9]?)-(?P<day>[0-9][0-9]?)"
207213
def date_in_match(match):
208-
return datetime.date(int(match.group('year')), int(match.group('month')), int(match.group('day')))
214+
y = int(match.group('year'))
215+
m = int(match.group('month'))
216+
d = int(match.group('day'))
217+
if d == 35: # borked status date
218+
d = 25
219+
return datetime.date(y, m, d)
209220

210221
re_telechat_agenda = re.compile(r"(Placed on|Removed from) agenda for telechat(| - %s) by" % date_re_str)
211222
re_telechat_changed = re.compile(r"Telechat date (was|has been) changed to (<b>)?%s(</b>)? from" % date_re_str)
@@ -238,7 +249,7 @@ def import_from_idinternal(d, idinternal):
238249
# extract events
239250
last_note_change_text = ""
240251

241-
for c in idinternal.documentcomment_set.order_by('date', 'time', 'id'):
252+
for c in DocumentComment.objects.filter(document=idinternal.draft_id).order_by('date', 'time', 'id'):
242253
handled = False
243254

244255
# telechat agenda schedulings
@@ -255,7 +266,7 @@ def import_from_idinternal(d, idinternal):
255266
# ballot issued
256267
match = re_ballot_issued.search(c.comment_text)
257268
if match:
258-
e = Text()
269+
e = Event()
259270
e.type = "sent_ballot_announcement"
260271
save_event(d, e, c)
261272

@@ -276,19 +287,41 @@ def import_from_idinternal(d, idinternal):
276287
# ballot positions
277288
match = re_ballot_position.search(c.comment_text)
278289
if match:
279-
position = match.group('position') or match.group('position2')
290+
position = ballot_position_mapping[match.group('position') or match.group('position2')]
280291
ad_name = match.group('for') or match.group('for2') or match.group('by') # some of the old positions don't specify who it's for, in that case assume it's "by", the person who entered the position
281292
ad_first, ad_last = ad_name.split(' ')
293+
login = IESGLogin.objects.filter(first_name=ad_first, last_name=ad_last).order_by('user_level')[0]
294+
if login.user_level == IESGLogin.SECRETARIAT_LEVEL:
295+
# now we're in trouble, a secretariat person isn't an
296+
# AD, instead try to find a position object that
297+
# matches and that we haven't taken yet
298+
positions = Position.objects.filter(ballot=idinternal.ballot)
299+
if position.slug == "noobj":
300+
positions = positions.filter(noobj=1)
301+
elif position.slug == "yes":
302+
positions = positions.filter(yes=1)
303+
elif position.slug == "abstain":
304+
positions = positions.filter(models.Q(abstain=1)|models.Q(abstain=2))
305+
elif position.slug == "recuse":
306+
positions = positions.filter(recuse=1)
307+
elif position.slug == "discuss":
308+
positions = positions.filter(models.Q(discuss=1)|models.Q(discuss=2))
309+
assert position.slug != "norecord"
310+
311+
for p in positions:
312+
if not d.event_set.filter(type="changed_ballot_position", ballotposition__pos=position, ballotposition__ad=iesg_login_to_email(p.ad)):
313+
login = p.ad
314+
break
282315

283316
e = BallotPosition()
284317
e.type = "changed_ballot_position"
285-
e.ad = iesg_login_to_email(IESGLogin.objects.get(first_name=ad_first, last_name=ad_last))
286-
last_pos = d.latest_event(type="changed_ballot_position", ballotposition__ad=e.ad)
287-
e.pos = ballot_position_mapping[position]
288-
e.discuss = last_pos.ballotposition.discuss if last_pos else ""
289-
e.discuss_time = last_pos.ballotposition.discuss_time if last_pos else None
290-
e.comment = last_pos.ballotposition.comment if last_pos else ""
291-
e.comment_time = last_pos.ballotposition.comment_time if last_pos else None
318+
e.ad = iesg_login_to_email(login)
319+
last_pos = d.latest_event(BallotPosition, type="changed_ballot_position", ad=e.ad)
320+
e.pos = position
321+
e.discuss = last_pos.discuss if last_pos else ""
322+
e.discuss_time = last_pos.discuss_time if last_pos else None
323+
e.comment = last_pos.comment if last_pos else ""
324+
e.comment_time = last_pos.comment_time if last_pos else None
292325
save_event(d, e, c)
293326
handled = True
294327

@@ -394,6 +427,7 @@ def import_from_idinternal(d, idinternal):
394427
if not handled:
395428
unhandled_lines = []
396429
for line in c.comment_text.split("<br>"):
430+
line = line.replace("&nbsp;", " ")
397431
# status date changed
398432
match = re_status_date_changed.search(line)
399433
if match:
@@ -442,7 +476,8 @@ def import_from_idinternal(d, idinternal):
442476
c.comment_text = "<br>".join(unhandled_lines)
443477

444478
if c.comment_text:
445-
print "COULDN'T HANDLE multi-line comment %s '%s'" % (c.id, c.comment_text.replace("\n", " ").replace("\r", "")[0:80])
479+
if "Due date has been changed" not in c.comment_text:
480+
print "COULDN'T HANDLE multi-line comment %s '%s'" % (c.id, c.comment_text.replace("\n", " ").replace("\r", "")[0:80])
446481

447482
# all others are added as comments
448483
if not handled:
@@ -458,18 +493,32 @@ def import_from_idinternal(d, idinternal):
458493
"IANA has questions",
459494
"IANA comments",
460495
"IANA Comments",
461-
"IANA Evaluation Comments",
462-
"Published as RFC",
496+
"IANA Evaluation Comment",
497+
"IANA Last Call Comments",
498+
"ublished as RFC",
499+
"A new comment added",
500+
"Due date has been changed",
501+
"Due&nbsp;date&nbsp;has&nbsp;been&nbsp;changed",
502+
"by&nbsp;<b>",
503+
"AD-review comments",
504+
"IANA Last Call",
505+
"Subject:",
506+
"Merged with",
463507
]
464508
for t in typical_comments:
465509
if t in c.comment_text:
466510
handled = True
467511
break
468512

469513
if not handled:
470-
print "couldn't handle comment %s '%s'" % (c.id, c.comment_text.replace("\n", " ").replace("\r", "")[0:80])
514+
print (u"COULDN'T HANDLE comment %s '%s' by %s" % (c.id, c.comment_text.replace("\n", " ").replace("\r", "")[0:80], c.created_by)).encode("utf-8")
471515

472-
made_up_date = d.latest_event().time + datetime.timedelta(seconds=1)
516+
e = d.latest_event()
517+
if e:
518+
made_up_date = e.time
519+
else:
520+
made_up_date = d.time
521+
made_up_date += datetime.timedelta(seconds=1)
473522

474523
e = d.latest_event(Status, type="changed_status_date")
475524
status_date = e.date if e else None
@@ -508,6 +557,70 @@ def import_from_idinternal(d, idinternal):
508557
ballot = None
509558

510559
if ballot:
560+
e = d.event_set.filter(type__in=("changed_ballot_position", "sent_ballot_announcement", "requested_last_call")).order_by('-time')[:1]
561+
if e:
562+
position_date = e[0].time + datetime.timedelta(seconds=1)
563+
else:
564+
position_date = made_up_date
565+
566+
# make sure we got all the positions
567+
existing = BallotPosition.objects.filter(doc=d, type="changed_ballot_position").order_by("-time")
568+
569+
for p in Position.objects.filter(ballot=ballot):
570+
found = False
571+
ad = iesg_login_to_email(p.ad)
572+
if p.noobj > 0:
573+
pos = ballot_position_mapping["No Objection"]
574+
elif p.yes > 0:
575+
pos = ballot_position_mapping["Yes"]
576+
elif p.abstain > 0:
577+
pos = ballot_position_mapping["Abstain"]
578+
elif p.recuse > 0:
579+
pos = ballot_position_mapping["Recuse"]
580+
elif p.discuss > 0:
581+
pos = ballot_position_mapping["Discuss"]
582+
else:
583+
pos = ballot_position_mapping[None]
584+
for x in existing:
585+
if x.ad == ad and x.pos == pos:
586+
found = True
587+
break
588+
589+
if not found:
590+
e = BallotPosition()
591+
e.type = "changed_ballot_position"
592+
e.doc = d
593+
e.time = position_date
594+
e.by = system_email
595+
e.ad = ad
596+
last_pos = d.latest_event(BallotPosition, type="changed_ballot_position", ad=e.ad)
597+
e.pos = pos
598+
e.discuss = last_pos.discuss if last_pos else ""
599+
e.discuss_time = last_pos.discuss_time if last_pos else None
600+
e.comment = last_pos.comment if last_pos else ""
601+
e.comment_time = last_pos.comment_time if last_pos else None
602+
if last_pos:
603+
e.desc = "[Ballot Position Update] Position for %s has been changed to %s from %s" % (ad.get_name(), pos.name, last_pos.pos.name)
604+
else:
605+
e.desc = "[Ballot Position Update] New position, %s, has been recorded for %s" % (pos.name, ad.get_name())
606+
e.save()
607+
608+
# make sure we got the ballot issued event
609+
if ballot.ballot_issued and not d.event_set.filter(type="sent_ballot_announcement"):
610+
position = d.event_set.filter(type=("changed_ballot_position")).order_by('time')[:1]
611+
if position:
612+
sent_date = position[0].time
613+
else:
614+
sent_date = made_up_date
615+
616+
e = Event()
617+
e.type = "sent_ballot_announcement"
618+
e.doc = d
619+
e.time = sent_date
620+
e.by = system_email
621+
e.desc = "Ballot has been issued"
622+
e.save()
623+
511624
# make sure the comments and discusses are updated
512625
positions = list(BallotPosition.objects.filter(doc=d).order_by("-time"))
513626
for c in IESGComment.objects.filter(ballot=idinternal.ballot):
@@ -578,8 +691,17 @@ def import_from_idinternal(d, idinternal):
578691
if document_name_to_import:
579692
all_drafts = all_drafts.filter(filename=document_name_to_import)
580693
#all_drafts = all_drafts[all_drafts.count() - 1000:]
581-
582-
for o in all_drafts:
694+
695+
# prevent memory from leaking from debug setting
696+
from django.db import connection
697+
class DummyQueries(object):
698+
def append(self, x):
699+
pass
700+
connection.queries = DummyQueries()
701+
702+
for index, o in enumerate(all_drafts.iterator()):
703+
print "importing", o.filename, index
704+
583705
try:
584706
d = Document.objects.get(name=o.filename)
585707
except Document.DoesNotExist:
@@ -620,14 +742,13 @@ def import_from_idinternal(d, idinternal):
620742
d.authors.clear()
621743
for i, a in enumerate(o.authors.all().select_related("person").order_by('author_order', 'person')):
622744
try:
623-
e = Email.objects.get(address=a.person.email()[1])
745+
e = Email.objects.get(address=a.person.email()[1] or u"unknown-email-%s-%s" % (a.person.first_name, a.person.last_name))
624746
# renumber since old numbers may be a bit borked
625747
DocumentAuthor.objects.create(document=d, author=e, order=i)
626748
except Email.DoesNotExist:
627749
print "SKIPPED author", unicode(a.person).encode('utf-8')
628750

629-
# clear any already imported events as the event importer isn't
630-
# clever enough to do a diff
751+
# clear any already imported events
631752
d.event_set.all().delete()
632753

633754
if o.idinternal:
@@ -646,7 +767,7 @@ def import_from_idinternal(d, idinternal):
646767
# we don't have time information in this source, so
647768
# hack the seconds to include the revision to ensure
648769
# they're ordered correctly
649-
e.time = datetime.datetime.combine(v.revision_date, datetime.time(0, 0, int(v.revision)))
770+
e.time = datetime.datetime.combine(v.revision_date, datetime.time(0, 0, 0)) + datetime.timedelta(seconds=int(v.revision))
650771
e.by = system_email
651772
e.doc = d
652773
e.desc = "New version available"
@@ -658,7 +779,7 @@ def import_from_idinternal(d, idinternal):
658779
# information completely
659780

660781
# make sure last decision is recorded
661-
e = d.latest_event(Event, type__in=("iesg_approved", "iesg_disapproved"))
782+
e = d.latest_event(type__in=("iesg_approved", "iesg_disapproved"))
662783
decision_date = e.time.date() if e else None
663784
if o.b_approve_date != decision_date:
664785
disapproved = o.idinternal and o.idinternal.dnp
@@ -697,9 +818,6 @@ def import_from_idinternal(d, idinternal):
697818

698819
# the RFC-related attributes are imported when we handle the RFCs below
699820

700-
print "imported", d.name, " - ", d.iesg_state
701-
702-
703821
# now process RFCs
704822

705823
def get_or_create_rfc_document(rfc_number):
@@ -744,9 +862,10 @@ def get_or_create_rfc_document(rfc_number):
744862
# did process
745863
all_rfcs = all_rfcs.filter(rfc_number__in=set(d.rfc_number for d in all_drafts if d.rfc_number))
746864

747-
for o in all_rfcs:
865+
for index, o in enumerate(all_rfcs.iterator()):
866+
print "importing rfc%s" % o.rfc_number, index
867+
748868
d, d_alias = get_or_create_rfc_document(o.rfc_number)
749-
#if d.name.startswith('rfc'):
750869
d.time = datetime.datetime.now()
751870
d.title = o.title
752871
d.std_level = std_level_mapping[o.current_status]
@@ -758,16 +877,18 @@ def get_or_create_rfc_document(rfc_number):
758877
rfcs = Rfc.objects.filter(rfc_number=o.rfc_number).select_related()
759878
if rfcs:
760879
r = rfcs[0]
761-
d.intended_std_level = intended_std_level_mapping[r.intended_status.status]
880+
l = intended_std_level_mapping[r.intended_status.status]
881+
if l:
882+
d.intended_std_level = l
762883
d.save()
763884

764885
# a few RFCs have an IDInternal so we may have to import the
765886
# events and attributes
766887
internals = IDInternal.objects.filter(rfc_flag=1, draft=o.rfc_number)
767888
if internals:
768889
if d.name.startswith("rfc"):
769-
# clear any already imported events as the event importer isn't
770-
# clever enough to do a diff
890+
# clear any already imported events, we don't do it for
891+
# drafts as they've already been cleared above
771892
d.event_set.all().delete()
772893
import_from_idinternal(d, internals[0])
773894

@@ -780,6 +901,9 @@ def get_or_create_rfc_document(rfc_number):
780901

781902
# import obsoletes/updates
782903
def make_relation(other_rfc, rel_type, reverse):
904+
if other_rfc.startswith("NIC") or other_rfc.startswith("IEN") or other_rfc.startswith("STD") or other_rfc.startswith("RTR"):
905+
return # we currently have no good way of importing these
906+
783907
other_number = int(other_rfc.replace("RFC", ""))
784908
other, other_alias = get_or_create_rfc_document(other_number)
785909
if reverse:
@@ -806,5 +930,3 @@ def make_relation(other_rfc, rel_type, reverse):
806930
sync_tag(d, o.has_errata, tag_has_errata)
807931

808932
# FIXME: import RFC authors?
809-
810-
print "imported", d_alias.name, " - ", d.rfc_state

0 commit comments

Comments
 (0)