Skip to content

Commit 5282bd1

Browse files
committed
Add sync scripts for reading from IANA changes API, reading from the
protocols page (to see when references to newly published RFCs have been updated) and parsing IANA review emails to be included as comments - Legacy-Id: 4850
1 parent 9fdcbc3 commit 5282bd1

4 files changed

Lines changed: 388 additions & 0 deletions

File tree

ietf/bin/iana-changes-updates

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/usr/bin/env python
2+
3+
import os, sys, re, json, datetime, optparse
4+
import syslog
5+
6+
# boilerplate
7+
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
8+
sys.path = [ basedir ] + sys.path
9+
10+
from ietf import settings
11+
from django.core import management
12+
management.setup_environ(settings)
13+
14+
15+
from optparse import OptionParser
16+
17+
parser = OptionParser()
18+
parser.add_option("-f", "--from", dest="start",
19+
help="Start time, defaults to a little less than 23 hours ago", metavar="YYYY-MM-DD HH:MM:SS")
20+
parser.add_option("-t", "--to", dest="end",
21+
help="End time, defaults to 23 hours later than from", metavar="YYYY-MM-DD HH:MM:SS")
22+
parser.add_option("", "--no-email", dest="send_email", default=True, action="store_false",
23+
help="Skip sending emails")
24+
25+
options, args = parser.parse_args()
26+
27+
# compensate to avoid we ask for something that happened now and then
28+
# don't get it back because our request interval is slightly off
29+
CLOCK_SKEW_COMPENSATION = 5 # seconds
30+
31+
# actually the interface accepts 24 hours, but then we get into
32+
# trouble with daylights savings - meh
33+
MAX_INTERVAL_ACCEPTED_BY_IANA = datetime.timedelta(hours=23)
34+
35+
36+
start = datetime.datetime.now() - datetime.timedelta(hours=23) + CLOCK_SKEW_COMPENSATION
37+
if options.start:
38+
start = datetime.datetime.strptime(options.start, "%Y-%m-%d %H:%M:%S")
39+
40+
end = start + datetime.timedelta(hours=23)
41+
if options.end:
42+
end = datetime.datetime.strptime(options.end, "%Y-%m-%d %H:%M:%S")
43+
44+
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_LOCAL0)
45+
46+
47+
from ietf.sync.iana import *
48+
49+
syslog.syslog("Updating history log with new changes from IANA from %s, period %s - %s" % (CHANGES_URL, start, end))
50+
51+
t = start
52+
while t < end:
53+
# the IANA server doesn't allow us to fetch more than a certain
54+
# period, so loop over the requested period and make multiple
55+
# requests if necessary
56+
57+
text = fetch_changes_json(CHANGES_URL, t, min(end, t + MAX_INTERVAL_ACCEPTED_BY_IANA))
58+
changes = parse_changes_json(text)
59+
added_events, warnings = update_history_with_changes(changes, send_email=options.send_email)
60+
61+
for e in added_events:
62+
syslog.syslog("Added event for %s %s: %s" % (e.doc_id, e.time, e.desc))
63+
64+
for w in warnings:
65+
syslog.syslog("WARNING: %s" % w)
66+
67+
t += MAX_INTERVAL_ACCEPTED_BY_IANA

ietf/bin/iana-protocols-updates

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env python
2+
3+
import os, sys, re, json, datetime
4+
import syslog
5+
6+
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_LOCAL0)
7+
8+
# boilerplate
9+
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
10+
sys.path = [ basedir ] + sys.path
11+
12+
from ietf import settings
13+
from django.core import management
14+
management.setup_environ(settings)
15+
16+
17+
from ietf.sync.iana import *
18+
19+
def chunks(l, n):
20+
"""Split list l up in chunks of max size n."""
21+
return (l[i:i+n] for i in xrange(0, len(l), n))
22+
23+
syslog.syslog("Updating history log with new RFC entries from IANA protocols page %s" % PROTOCOLS_URL)
24+
25+
# FIXME: this needs to be the date where this tool is first deployed
26+
rfc_must_published_later_than = datetime.datetime(2012, 8, 30, 0, 0, 0)
27+
28+
text = fetch_protocol_page(PROTOCOLS_URL)
29+
rfc_numbers = parse_protocol_page(text)
30+
for chunk in chunks(rfc_numbers, 100):
31+
updated = update_rfc_log_from_protocol_page(chunk, rfc_must_published_later_than)
32+
33+
for d in updated:
34+
syslog.syslog("Added history entry for %s" % d.display_name())

ietf/bin/iana-review-email

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env python
2+
3+
import os, sys, re, json, datetime, optparse
4+
import syslog
5+
6+
# boilerplate
7+
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
8+
sys.path = [ basedir ] + sys.path
9+
10+
from ietf import settings
11+
from django.core import management
12+
management.setup_environ(settings)
13+
14+
15+
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_LOCAL0)
16+
17+
from ietf.sync.iana import *
18+
19+
msg = sys.stdin.read()
20+
21+
syslog.syslog("Reading IANA review email")
22+
23+
doc_name, review_time, by, comment = parse_review_email(msg)
24+
add_review_comment(doc_name, review_time, by, comment)
25+
26+
if by.name == "(System)":
27+
syslog.syslog("WARNING: person responsible for email does not have a IANA role")

ietf/sync/iana.py

Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
import re, urllib2, json, email
2+
3+
from django.utils.http import urlquote
4+
5+
from ietf.doc.models import *
6+
from ietf.doc.utils import add_state_change_event
7+
from ietf.person.models import *
8+
from ietf.idrfc.mails import email_owner, email_state_changed, email_authors
9+
from ietf.utils.timezone import *
10+
11+
PROTOCOLS_URL = "http://www.iana.org/protocols/"
12+
CHANGES_URL = "http://datatracker.dev.icann.org:8080/data-tracker/changes"
13+
14+
def fetch_protocol_page(url):
15+
f = urllib2.urlopen(PROTOCOLS_URL)
16+
text = f.read()
17+
f.close()
18+
return text
19+
20+
def parse_protocol_page(text):
21+
"""Parse IANA protocols page to extract referenced RFCs (as
22+
rfcXXXX document names)."""
23+
matches = re.findall('RFC [0-9]+', text)
24+
res = set()
25+
for m in matches:
26+
res.add("rfc" + m[len("RFC "):])
27+
28+
return list(res)
29+
30+
def update_rfc_log_from_protocol_page(rfc_names, rfc_must_published_later_than):
31+
"""Add notices to RFC history log that IANA is now referencing the RFC."""
32+
system = Person.objects.get(name="(System)")
33+
34+
updated = []
35+
36+
docs = Document.objects.filter(docalias__name__in=rfc_names).exclude(
37+
docevent__type="rfc_in_iana_registry").filter(
38+
# only take those that were published after cutoff since we
39+
# have a big bunch of old RFCs that we unfortunately don't have data for
40+
docevent__type="published_rfc", docevent__time__gte=rfc_must_published_later_than
41+
).distinct()
42+
43+
for d in docs:
44+
e = DocEvent(doc=d)
45+
e.by = system
46+
e.type = "rfc_in_iana_registry"
47+
e.desc = "IANA registries were updated to include %s" % d.display_name()
48+
e.save()
49+
50+
updated.append(d)
51+
52+
return updated
53+
54+
55+
56+
def fetch_changes_json(url, start, end):
57+
url += "?start=%s&end=%s" % (urlquote(local_timezone_to_utc(start).strftime("%Y-%m-%d %H:%M:%S")),
58+
urlquote(local_timezone_to_utc(end).strftime("%Y-%m-%d %H:%M:%S")))
59+
f = urllib2.urlopen(url)
60+
text = f.read()
61+
f.close()
62+
return text
63+
64+
def parse_changes_json(text):
65+
response = json.loads(text)
66+
67+
if "error" in response:
68+
raise Exception("IANA server returned error: %s" % response["error"])
69+
70+
changes = response["changes"]
71+
72+
# do some rudimentary validation
73+
for i in changes:
74+
for f in ['doc', 'type', 'time']:
75+
if f not in i:
76+
raise Exception('Error in response: Field %s missing in input: %s - %s' % (f, json.dumps(i), json.dumps(changes)))
77+
78+
# a little bit of cleaning
79+
i["doc"] = i["doc"].strip()
80+
if i["doc"].startswith("http://www.ietf.org/internet-drafts/"):
81+
i["doc"] = i["doc"][len("http://www.ietf.org/internet-drafts/"):]
82+
83+
# make sure we process oldest entries first
84+
changes.sort(key=lambda c: c["time"])
85+
86+
return changes
87+
88+
def update_history_with_changes(changes, send_email=True):
89+
"""Take parsed changes from IANA and apply them. Note that we
90+
expect to get these in chronologically sorted, otherwise the
91+
change descriptions generated may not be right."""
92+
93+
# build up state lookup
94+
states = {}
95+
96+
slookup = dict((s.slug, s)
97+
for s in State.objects.filter(type=StateType.objects.get(slug="draft-iana-action")))
98+
states["action"] = {
99+
"": slookup["newdoc"],
100+
"In Progress": slookup["inprog"],
101+
"Open": slookup["inprog"],
102+
"pre-approval In Progress": slookup["inprog"],
103+
"Waiting on Authors": slookup["waitauth"],
104+
"Author": slookup["waitauth"],
105+
"Waiting on ADs": slookup["waitad"],
106+
"Waiting on AD": slookup["waitad"],
107+
"AD": slookup["waitad"],
108+
"Waiting on WGC": slookup["waitwgc"],
109+
"WGC": slookup["waitwgc"],
110+
"Waiting on RFC-Editor": slookup["waitrfc"],
111+
"Waiting on RFC Editor": slookup["waitrfc"],
112+
"RFC-Editor": slookup["waitrfc"],
113+
"RFC-Ed-ACK": slookup["rfcedack"],
114+
"RFC-Editor-ACK": slookup["rfcedack"],
115+
"Completed": slookup["rfcedack"],
116+
"On Hold": slookup["onhold"],
117+
"No IC": slookup["noic"],
118+
}
119+
120+
slookup = dict((s.slug, s)
121+
for s in State.objects.filter(type=StateType.objects.get(slug="draft-iana-review")))
122+
states["review"] = {
123+
"IANA Review Needed": slookup["need-rev"],
124+
"IANA OK - Actions Needed": slookup["ok-act"],
125+
"IANA OK - No Actions Needed": slookup["ok-noact"],
126+
"IANA Not OK": slookup["not-ok"],
127+
"Version Changed - Review Needed": slookup["changed"],
128+
}
129+
130+
# so it turns out IANA has made a mistake and are including some
131+
# wrong states, we'll have to skip those
132+
wrong_action_states = ("Waiting on Reviewer", "Review Complete", "Last Call",
133+
"Last Call - Questions", "Evaluation", "Evaluation - Questions",
134+
"With Reviewer", "IESG Notification Received", "Watiing on Last Call",
135+
"IANA Comments Submitted", "Waiting on Last Call")
136+
137+
system = Person.objects.get(name="(System)")
138+
139+
added_events = []
140+
warnings = []
141+
142+
for c in changes:
143+
docname = c['doc']
144+
timestamp = datetime.datetime.strptime(c["time"], "%Y-%m-%d %H:%M:%S")
145+
timestamp = utc_to_local_timezone(timestamp) # timestamps are in UTC
146+
147+
if c['type'] in ("iana_state", "iana_review"):
148+
if c['type'] == "iana_state":
149+
kind = "action"
150+
151+
if c["state"] in wrong_action_states:
152+
warnings.append("Wrong action state '%s' encountered in changes from IANA" % c["state"])
153+
continue
154+
else:
155+
kind = "review"
156+
157+
if c["state"] not in states[kind]:
158+
warnings.append("Unknown IANA %s state %s (%s)" % (kind, c["state"], timestamp))
159+
print "Unknown IANA %s state %s" % (kind, c["state"])
160+
continue
161+
162+
state = states[kind][c["state"]]
163+
state_type = "draft-iana-%s" % kind
164+
165+
e = StateDocEvent.objects.filter(type="changed_state", time=timestamp,
166+
state_type=state_type, state=state)
167+
if not e:
168+
try:
169+
doc = Document.objects.get(docalias__name=docname)
170+
except Document.DoesNotExist:
171+
warnings.append("Document %s not found" % docname)
172+
continue
173+
174+
# the naive way of extracting prev_state here means
175+
# that we assume these changes are cronologically
176+
# applied
177+
prev_state = doc.get_state(state_type)
178+
e = add_state_change_event(doc, system, prev_state, state, timestamp)
179+
180+
added_events.append(e)
181+
182+
if not StateDocEvent.objects.filter(doc=doc, time__gt=timestamp, state_type=state_type):
183+
save_document_in_history(doc)
184+
doc.set_state(state)
185+
186+
if send_email:
187+
email_state_changed(None, doc, "IANA %s state changed to %s" % (kind, state.name))
188+
email_owner(None, doc, doc.ad, system, "IANA %s state changed to %s" % (kind, state.name))
189+
190+
if doc.time < timestamp:
191+
doc.time = timestamp
192+
doc.save()
193+
194+
return added_events, warnings
195+
196+
197+
def parse_review_email(text):
198+
msg = email.message_from_string(text)
199+
200+
# doc
201+
doc_name = ""
202+
m = re.search(r"<([^>]+)>", msg["Subject"])
203+
if m:
204+
doc_name = m.group(1).lower()
205+
if re.search(r"\.\w{3}$", doc_name): # strip off extension
206+
doc_name = doc_name[:-4]
207+
208+
if re.search(r"-\d{2}$", doc_name): # strip off revision
209+
doc_name = doc_name[:-3]
210+
211+
# date
212+
review_time = datetime.datetime.now()
213+
if "Date" in msg:
214+
review_time = email_time_to_local_timezone(msg["Date"])
215+
216+
# by
217+
by = None
218+
m = re.search(r"\"(.*)\"", msg["From"])
219+
if m:
220+
name = m.group(1).strip()
221+
if name.endswith(" via RT"):
222+
name = name[:-len(" via RT")]
223+
224+
try:
225+
by = Person.objects.get(alias__name=name, role__group__acronym="iana")
226+
except Person.DoesNotExist:
227+
pass
228+
229+
if not by:
230+
by = Person.objects.get(name="(System)")
231+
232+
# comment
233+
body = msg.get_payload().decode('quoted-printable').replace("\r", "")
234+
b = body.find("(BEGIN IANA LAST CALL COMMENTS)")
235+
e = body.find("(END IANA LAST CALL COMMENTS)")
236+
237+
comment = body[b + len("(BEGIN IANA LAST CALL COMMENTS)"):e].strip()
238+
239+
# strip leading IESG:
240+
if comment.startswith("IESG:"):
241+
comment = comment[len("IESG:"):].lstrip()
242+
243+
# strip ending Thanks, followed by signature
244+
m = re.compile(r"^Thanks,\n\n", re.MULTILINE).search(comment)
245+
if m:
246+
comment = comment[:m.start()].rstrip()
247+
248+
return doc_name, review_time, by, comment
249+
250+
def add_review_comment(doc_name, review_time, by, comment):
251+
try:
252+
e = DocEvent.objects.get(doc__name=doc_name, time=review_time, type="iana_review")
253+
except DocEvent.DoesNotExist:
254+
doc = Document.objects.get(name=doc_name)
255+
e = DocEvent(doc=doc, time=review_time, type="iana_review")
256+
257+
e.desc = comment
258+
e.by = by
259+
260+
e.save()

0 commit comments

Comments
 (0)