Skip to content

Commit d6565f0

Browse files
committed
Added exception logging for ietf/bin/rfc-editor-queue-updates. Changed the logging in the ietf/bin/rfc-editor-* scripts to use ietf.utils.log.log(). Reordered some imports.
- Legacy-Id: 15318
1 parent 85c24b8 commit d6565f0

3 files changed

Lines changed: 152 additions & 143 deletions

File tree

ietf/bin/rfc-editor-index-updates

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/usr/bin/env python
22

33
import os, sys, datetime
4-
import syslog
54
import traceback
65

76
# boilerplate
@@ -13,15 +12,20 @@ virtualenv_activation = os.path.join(basedir, "env", "bin", "activate_this.py")
1312
if os.path.exists(virtualenv_activation):
1413
execfile(virtualenv_activation, dict(__file__=virtualenv_activation))
1514

16-
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
17-
1815
import django
1916
django.setup()
2017

2118
from django.conf import settings
2219
from optparse import OptionParser
2320
from django.core.mail import mail_admins
2421

22+
from ietf.doc.utils import rebuild_reference_relations
23+
from ietf.utils.log import log
24+
from ietf.utils.pipe import pipe
25+
26+
import ietf.sync.rfceditor
27+
28+
2529
parser = OptionParser()
2630
parser.add_option("-d", dest="skip_date",
2731
help="To speed up processing skip RFCs published before this date (default is one year ago)", metavar="YYYY-MM-DD")
@@ -32,17 +36,13 @@ skip_date = datetime.date.today() - datetime.timedelta(days=365)
3236
if options.skip_date:
3337
skip_date = datetime.datetime.strptime(options.skip_date, "%Y-%m-%d").date()
3438

35-
from ietf.utils.pipe import pipe
36-
from ietf.doc.utils import rebuild_reference_relations
37-
import ietf.sync.rfceditor
38-
39-
syslog.syslog("Updating document metadata from RFC index from %s" % settings.RFC_EDITOR_INDEX_URL)
39+
log("Updating document metadata from RFC index from %s" % settings.RFC_EDITOR_INDEX_URL)
4040

4141
response = ietf.sync.rfceditor.fetch_index_xml(settings.RFC_EDITOR_INDEX_URL)
4242
data = ietf.sync.rfceditor.parse_index(response)
4343

4444
if len(data) < ietf.sync.rfceditor.MIN_INDEX_RESULTS:
45-
syslog.syslog("Not enough results, only %s" % len(data))
45+
log("Not enough results, only %s" % len(data))
4646
sys.exit(1)
4747

4848
new_rfcs = []
@@ -51,8 +51,7 @@ for changes, doc, rfc_published in ietf.sync.rfceditor.update_docs_from_rfc_inde
5151
new_rfcs.append(doc)
5252

5353
for c in changes:
54-
syslog.syslog("%s: %s" % (doc.name, c))
55-
print "%s: %s" % (doc.name, c)
54+
log("%s: %s" % (doc.name, c))
5655

5756
sys.exit(0)
5857

@@ -67,12 +66,12 @@ if newpid == 0:
6766
pipe("%s -a %s %s" % (settings.RSYNC_BINARY,settings.RFC_TEXT_RSYNC_SOURCE,settings.RFC_PATH))
6867
for rfc in new_rfcs:
6968
rebuild_reference_relations(rfc)
70-
syslog.syslog("Updated references for %s"%rfc.canonical_name())
69+
log("Updated references for %s"%rfc.canonical_name())
7170
except:
7271
subject = "Exception in updating references for new rfcs: %s : %s" % (sys.exc_info()[0],sys.exc_info()[1])
7372
msg = "%s\n%s\n----\n%s"%(sys.exc_info()[0],sys.exc_info()[1],traceback.format_tb(sys.exc_info()[2]))
7473
mail_admins(subject,msg,fail_silently=True)
75-
syslog.syslog(subject)
74+
log(subject)
7675
os._exit(0)
7776
else:
7877
sys.exit(0)

ietf/bin/rfc-editor-queue-updates

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/usr/bin/env python
22

3-
import os, sys, re, json, datetime
4-
import syslog
3+
import os, sys
54

65
# boilerplate
76
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
@@ -12,28 +11,28 @@ virtualenv_activation = os.path.join(basedir, "env", "bin", "activate_this.py")
1211
if os.path.exists(virtualenv_activation):
1312
execfile(virtualenv_activation, dict(__file__=virtualenv_activation))
1413

15-
syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
16-
1714
import django
1815
django.setup()
1916

2017
from django.conf import settings
21-
from ietf.sync.rfceditor import *
2218

23-
syslog.syslog("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL)
19+
from ietf.sync.rfceditor import fetch_queue_xml, parse_queue, MIN_QUEUE_RESULTS, update_drafts_from_queue
20+
from ietf.utils.log import log
21+
22+
log("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL)
2423

2524
response = fetch_queue_xml(settings.RFC_EDITOR_QUEUE_URL)
2625
drafts, warnings = parse_queue(response)
2726
for w in warnings:
28-
syslog.syslog(u"WARNING: %s" % w)
27+
log(u"Warning: %s" % w)
2928

3029
if len(drafts) < MIN_QUEUE_RESULTS:
31-
syslog.syslog("Not enough results, only %s" % len(drafts))
30+
log("Not enough results, only %s" % len(drafts))
3231
sys.exit(1)
3332

3433
changed, warnings = update_drafts_from_queue(drafts)
3534
for w in warnings:
36-
syslog.syslog(u"WARNING: %s" % w)
35+
log(u"Warning: %s" % w)
3736

3837
for c in changed:
39-
syslog.syslog(u"Updated %s" % c)
38+
log(u"Updated %s" % c)

ietf/sync/rfceditor.py

Lines changed: 131 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
from django.conf import settings
1010

11+
import debug # pyflakes:ignore
12+
1113
from ietf.doc.models import ( Document, DocAlias, State, StateType, DocEvent, DocRelationshipName,
1214
DocTagName, DocTypeName, RelatedDocument )
1315
from ietf.doc.expire import move_draft_files_to_archive
@@ -45,68 +47,73 @@ def parse_queue(response):
4547
stream = None
4648

4749
for event, node in events:
48-
if event == pulldom.START_ELEMENT and node.tagName == "entry":
49-
events.expandNode(node)
50-
node.normalize()
51-
draft_name = get_child_text(node, "draft").strip()
52-
draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name)
53-
date_received = get_child_text(node, "date-received")
50+
try:
51+
if event == pulldom.START_ELEMENT and node.tagName == "entry":
52+
events.expandNode(node)
53+
node.normalize()
54+
draft_name = get_child_text(node, "draft").strip()
55+
draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name)
56+
date_received = get_child_text(node, "date-received")
57+
58+
state = ""
59+
tags = []
60+
missref_generation = ""
61+
for child in node.childNodes:
62+
if child.nodeType == Node.ELEMENT_NODE and child.localName == "state":
63+
state = child.firstChild.data
64+
# state has some extra annotations encoded, parse
65+
# them out
66+
if '*R' in state:
67+
tags.append("ref")
68+
state = state.replace("*R", "")
69+
if '*A' in state:
70+
tags.append("iana")
71+
state = state.replace("*A", "")
72+
m = re.search(r"\(([0-9]+)G\)", state)
73+
if m:
74+
missref_generation = m.group(1)
75+
state = state.replace("(%sG)" % missref_generation, "")
76+
77+
# AUTH48 link
78+
auth48 = ""
79+
for child in node.childNodes:
80+
if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url":
81+
auth48 = child.firstChild.data
82+
83+
# cluster link (if it ever gets implemented)
84+
cluster = ""
85+
for child in node.childNodes:
86+
if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url":
87+
cluster = child.firstChild.data
88+
89+
refs = []
90+
for child in node.childNodes:
91+
if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef":
92+
ref_name = get_child_text(child, "ref-name")
93+
ref_state = get_child_text(child, "ref-state")
94+
in_queue = ref_state.startswith("IN-QUEUE")
95+
refs.append((ref_name, ref_state, in_queue))
96+
97+
drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs))
98+
99+
elif event == pulldom.START_ELEMENT and node.tagName == "section":
100+
name = node.getAttribute('name')
101+
if name.startswith("IETF"):
102+
stream = "ietf"
103+
elif name.startswith("IAB"):
104+
stream = "iab"
105+
elif name.startswith("IRTF"):
106+
stream = "irtf"
107+
elif name.startswith("INDEPENDENT"):
108+
stream = "ise"
109+
else:
110+
stream = None
111+
warnings.append("unrecognized section " + name)
112+
except Exception as e:
113+
log("Exception when processing an RFC queue entry: %s" % e)
114+
log("node: %s" % node)
115+
raise
54116

55-
state = ""
56-
tags = []
57-
missref_generation = ""
58-
for child in node.childNodes:
59-
if child.nodeType == Node.ELEMENT_NODE and child.localName == "state":
60-
state = child.firstChild.data
61-
# state has some extra annotations encoded, parse
62-
# them out
63-
if '*R' in state:
64-
tags.append("ref")
65-
state = state.replace("*R", "")
66-
if '*A' in state:
67-
tags.append("iana")
68-
state = state.replace("*A", "")
69-
m = re.search(r"\(([0-9]+)G\)", state)
70-
if m:
71-
missref_generation = m.group(1)
72-
state = state.replace("(%sG)" % missref_generation, "")
73-
74-
# AUTH48 link
75-
auth48 = ""
76-
for child in node.childNodes:
77-
if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url":
78-
auth48 = child.firstChild.data
79-
80-
# cluster link (if it ever gets implemented)
81-
cluster = ""
82-
for child in node.childNodes:
83-
if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url":
84-
cluster = child.firstChild.data
85-
86-
refs = []
87-
for child in node.childNodes:
88-
if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef":
89-
ref_name = get_child_text(child, "ref-name")
90-
ref_state = get_child_text(child, "ref-state")
91-
in_queue = ref_state.startswith("IN-QUEUE")
92-
refs.append((ref_name, ref_state, in_queue))
93-
94-
drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs))
95-
96-
elif event == pulldom.START_ELEMENT and node.tagName == "section":
97-
name = node.getAttribute('name')
98-
if name.startswith("IETF"):
99-
stream = "ietf"
100-
elif name.startswith("IAB"):
101-
stream = "iab"
102-
elif name.startswith("IRTF"):
103-
stream = "irtf"
104-
elif name.startswith("INDEPENDENT"):
105-
stream = "ise"
106-
else:
107-
stream = None
108-
warnings.append("unrecognized section " + name)
109-
110117
return drafts, warnings
111118

112119
def update_drafts_from_queue(drafts):
@@ -243,67 +250,71 @@ def extract_doc_list(parentNode, tagName):
243250
data = []
244251
events = pulldom.parse(response)
245252
for event, node in events:
246-
if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]:
247-
events.expandNode(node)
248-
node.normalize()
249-
bcpid = normalize_std_name(get_child_text(node, "doc-id"))
250-
doclist = extract_doc_list(node, "is-also")
251-
for docid in doclist:
252-
if docid in also_list:
253-
also_list[docid].append(bcpid)
253+
try:
254+
if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]:
255+
events.expandNode(node)
256+
node.normalize()
257+
bcpid = normalize_std_name(get_child_text(node, "doc-id"))
258+
doclist = extract_doc_list(node, "is-also")
259+
for docid in doclist:
260+
if docid in also_list:
261+
also_list[docid].append(bcpid)
262+
else:
263+
also_list[docid] = [bcpid]
264+
265+
elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry":
266+
events.expandNode(node)
267+
node.normalize()
268+
rfc_number = int(get_child_text(node, "doc-id")[3:])
269+
title = get_child_text(node, "title")
270+
271+
authors = []
272+
for author in node.getElementsByTagName("author"):
273+
authors.append(get_child_text(author, "name"))
274+
275+
d = node.getElementsByTagName("date")[0]
276+
year = int(get_child_text(d, "year"))
277+
month = get_child_text(d, "month")
278+
month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1
279+
rfc_published_date = datetime.date(year, month, 1)
280+
281+
current_status = get_child_text(node, "current-status").title()
282+
283+
updates = extract_doc_list(node, "updates")
284+
updated_by = extract_doc_list(node, "updated-by")
285+
obsoletes = extract_doc_list(node, "obsoletes")
286+
obsoleted_by = extract_doc_list(node, "obsoleted-by")
287+
stream = get_child_text(node, "stream")
288+
wg = get_child_text(node, "wg_acronym")
289+
if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15):
290+
wg = None
291+
292+
l = []
293+
pages = ""
294+
for fmt in node.getElementsByTagName("format"):
295+
l.append(get_child_text(fmt, "file-format"))
296+
if get_child_text(fmt, "file-format") == "ASCII":
297+
pages = get_child_text(fmt, "page-count")
298+
file_formats = (",".join(l)).lower()
299+
300+
abstract = ""
301+
for abstract in node.getElementsByTagName("abstract"):
302+
abstract = get_child_text(abstract, "p")
303+
304+
draft = get_child_text(node, "draft")
305+
if draft and re.search("-\d\d$", draft):
306+
draft = draft[0:-3]
307+
308+
if len(node.getElementsByTagName("errata-url")) > 0:
309+
has_errata = 1
254310
else:
255-
also_list[docid] = [bcpid]
256-
257-
elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry":
258-
events.expandNode(node)
259-
node.normalize()
260-
rfc_number = int(get_child_text(node, "doc-id")[3:])
261-
title = get_child_text(node, "title")
262-
263-
authors = []
264-
for author in node.getElementsByTagName("author"):
265-
authors.append(get_child_text(author, "name"))
266-
267-
d = node.getElementsByTagName("date")[0]
268-
year = int(get_child_text(d, "year"))
269-
month = get_child_text(d, "month")
270-
month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1
271-
rfc_published_date = datetime.date(year, month, 1)
272-
273-
current_status = get_child_text(node, "current-status").title()
274-
275-
updates = extract_doc_list(node, "updates")
276-
updated_by = extract_doc_list(node, "updated-by")
277-
obsoletes = extract_doc_list(node, "obsoletes")
278-
obsoleted_by = extract_doc_list(node, "obsoleted-by")
279-
stream = get_child_text(node, "stream")
280-
wg = get_child_text(node, "wg_acronym")
281-
if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15):
282-
wg = None
283-
284-
l = []
285-
pages = ""
286-
for fmt in node.getElementsByTagName("format"):
287-
l.append(get_child_text(fmt, "file-format"))
288-
if get_child_text(fmt, "file-format") == "ASCII":
289-
pages = get_child_text(fmt, "page-count")
290-
file_formats = (",".join(l)).lower()
291-
292-
abstract = ""
293-
for abstract in node.getElementsByTagName("abstract"):
294-
abstract = get_child_text(abstract, "p")
295-
296-
draft = get_child_text(node, "draft")
297-
if draft and re.search("-\d\d$", draft):
298-
draft = draft[0:-3]
299-
300-
if len(node.getElementsByTagName("errata-url")) > 0:
301-
has_errata = 1
302-
else:
303-
has_errata = 0
304-
305-
data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract))
311+
has_errata = 0
306312

313+
data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract))
314+
except Exception as e:
315+
log("Exception when processing an RFC index entry: %s" % e)
316+
log("node: %s" % node)
317+
raise
307318
for d in data:
308319
k = "RFC%04d" % d[0]
309320
if k in also_list:

0 commit comments

Comments
 (0)