|
8 | 8 |
|
9 | 9 | from django.conf import settings |
10 | 10 |
|
| 11 | +import debug # pyflakes:ignore |
| 12 | + |
11 | 13 | from ietf.doc.models import ( Document, DocAlias, State, StateType, DocEvent, DocRelationshipName, |
12 | 14 | DocTagName, DocTypeName, RelatedDocument ) |
13 | 15 | from ietf.doc.expire import move_draft_files_to_archive |
@@ -45,68 +47,73 @@ def parse_queue(response): |
45 | 47 | stream = None |
46 | 48 |
|
47 | 49 | for event, node in events: |
48 | | - if event == pulldom.START_ELEMENT and node.tagName == "entry": |
49 | | - events.expandNode(node) |
50 | | - node.normalize() |
51 | | - draft_name = get_child_text(node, "draft").strip() |
52 | | - draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name) |
53 | | - date_received = get_child_text(node, "date-received") |
| 50 | + try: |
| 51 | + if event == pulldom.START_ELEMENT and node.tagName == "entry": |
| 52 | + events.expandNode(node) |
| 53 | + node.normalize() |
| 54 | + draft_name = get_child_text(node, "draft").strip() |
| 55 | + draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name) |
| 56 | + date_received = get_child_text(node, "date-received") |
| 57 | + |
| 58 | + state = "" |
| 59 | + tags = [] |
| 60 | + missref_generation = "" |
| 61 | + for child in node.childNodes: |
| 62 | + if child.nodeType == Node.ELEMENT_NODE and child.localName == "state": |
| 63 | + state = child.firstChild.data |
| 64 | + # state has some extra annotations encoded, parse |
| 65 | + # them out |
| 66 | + if '*R' in state: |
| 67 | + tags.append("ref") |
| 68 | + state = state.replace("*R", "") |
| 69 | + if '*A' in state: |
| 70 | + tags.append("iana") |
| 71 | + state = state.replace("*A", "") |
| 72 | + m = re.search(r"\(([0-9]+)G\)", state) |
| 73 | + if m: |
| 74 | + missref_generation = m.group(1) |
| 75 | + state = state.replace("(%sG)" % missref_generation, "") |
| 76 | + |
| 77 | + # AUTH48 link |
| 78 | + auth48 = "" |
| 79 | + for child in node.childNodes: |
| 80 | + if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url": |
| 81 | + auth48 = child.firstChild.data |
| 82 | + |
| 83 | + # cluster link (if it ever gets implemented) |
| 84 | + cluster = "" |
| 85 | + for child in node.childNodes: |
| 86 | + if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url": |
| 87 | + cluster = child.firstChild.data |
| 88 | + |
| 89 | + refs = [] |
| 90 | + for child in node.childNodes: |
| 91 | + if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef": |
| 92 | + ref_name = get_child_text(child, "ref-name") |
| 93 | + ref_state = get_child_text(child, "ref-state") |
| 94 | + in_queue = ref_state.startswith("IN-QUEUE") |
| 95 | + refs.append((ref_name, ref_state, in_queue)) |
| 96 | + |
| 97 | + drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs)) |
| 98 | + |
| 99 | + elif event == pulldom.START_ELEMENT and node.tagName == "section": |
| 100 | + name = node.getAttribute('name') |
| 101 | + if name.startswith("IETF"): |
| 102 | + stream = "ietf" |
| 103 | + elif name.startswith("IAB"): |
| 104 | + stream = "iab" |
| 105 | + elif name.startswith("IRTF"): |
| 106 | + stream = "irtf" |
| 107 | + elif name.startswith("INDEPENDENT"): |
| 108 | + stream = "ise" |
| 109 | + else: |
| 110 | + stream = None |
| 111 | + warnings.append("unrecognized section " + name) |
| 112 | + except Exception as e: |
| 113 | + log("Exception when processing an RFC queue entry: %s" % e) |
| 114 | + log("node: %s" % node) |
| 115 | + raise |
54 | 116 |
|
55 | | - state = "" |
56 | | - tags = [] |
57 | | - missref_generation = "" |
58 | | - for child in node.childNodes: |
59 | | - if child.nodeType == Node.ELEMENT_NODE and child.localName == "state": |
60 | | - state = child.firstChild.data |
61 | | - # state has some extra annotations encoded, parse |
62 | | - # them out |
63 | | - if '*R' in state: |
64 | | - tags.append("ref") |
65 | | - state = state.replace("*R", "") |
66 | | - if '*A' in state: |
67 | | - tags.append("iana") |
68 | | - state = state.replace("*A", "") |
69 | | - m = re.search(r"\(([0-9]+)G\)", state) |
70 | | - if m: |
71 | | - missref_generation = m.group(1) |
72 | | - state = state.replace("(%sG)" % missref_generation, "") |
73 | | - |
74 | | - # AUTH48 link |
75 | | - auth48 = "" |
76 | | - for child in node.childNodes: |
77 | | - if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url": |
78 | | - auth48 = child.firstChild.data |
79 | | - |
80 | | - # cluster link (if it ever gets implemented) |
81 | | - cluster = "" |
82 | | - for child in node.childNodes: |
83 | | - if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url": |
84 | | - cluster = child.firstChild.data |
85 | | - |
86 | | - refs = [] |
87 | | - for child in node.childNodes: |
88 | | - if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef": |
89 | | - ref_name = get_child_text(child, "ref-name") |
90 | | - ref_state = get_child_text(child, "ref-state") |
91 | | - in_queue = ref_state.startswith("IN-QUEUE") |
92 | | - refs.append((ref_name, ref_state, in_queue)) |
93 | | - |
94 | | - drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs)) |
95 | | - |
96 | | - elif event == pulldom.START_ELEMENT and node.tagName == "section": |
97 | | - name = node.getAttribute('name') |
98 | | - if name.startswith("IETF"): |
99 | | - stream = "ietf" |
100 | | - elif name.startswith("IAB"): |
101 | | - stream = "iab" |
102 | | - elif name.startswith("IRTF"): |
103 | | - stream = "irtf" |
104 | | - elif name.startswith("INDEPENDENT"): |
105 | | - stream = "ise" |
106 | | - else: |
107 | | - stream = None |
108 | | - warnings.append("unrecognized section " + name) |
109 | | - |
110 | 117 | return drafts, warnings |
111 | 118 |
|
112 | 119 | def update_drafts_from_queue(drafts): |
@@ -243,67 +250,71 @@ def extract_doc_list(parentNode, tagName): |
243 | 250 | data = [] |
244 | 251 | events = pulldom.parse(response) |
245 | 252 | for event, node in events: |
246 | | - if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]: |
247 | | - events.expandNode(node) |
248 | | - node.normalize() |
249 | | - bcpid = normalize_std_name(get_child_text(node, "doc-id")) |
250 | | - doclist = extract_doc_list(node, "is-also") |
251 | | - for docid in doclist: |
252 | | - if docid in also_list: |
253 | | - also_list[docid].append(bcpid) |
| 253 | + try: |
| 254 | + if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]: |
| 255 | + events.expandNode(node) |
| 256 | + node.normalize() |
| 257 | + bcpid = normalize_std_name(get_child_text(node, "doc-id")) |
| 258 | + doclist = extract_doc_list(node, "is-also") |
| 259 | + for docid in doclist: |
| 260 | + if docid in also_list: |
| 261 | + also_list[docid].append(bcpid) |
| 262 | + else: |
| 263 | + also_list[docid] = [bcpid] |
| 264 | + |
| 265 | + elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry": |
| 266 | + events.expandNode(node) |
| 267 | + node.normalize() |
| 268 | + rfc_number = int(get_child_text(node, "doc-id")[3:]) |
| 269 | + title = get_child_text(node, "title") |
| 270 | + |
| 271 | + authors = [] |
| 272 | + for author in node.getElementsByTagName("author"): |
| 273 | + authors.append(get_child_text(author, "name")) |
| 274 | + |
| 275 | + d = node.getElementsByTagName("date")[0] |
| 276 | + year = int(get_child_text(d, "year")) |
| 277 | + month = get_child_text(d, "month") |
| 278 | + month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1 |
| 279 | + rfc_published_date = datetime.date(year, month, 1) |
| 280 | + |
| 281 | + current_status = get_child_text(node, "current-status").title() |
| 282 | + |
| 283 | + updates = extract_doc_list(node, "updates") |
| 284 | + updated_by = extract_doc_list(node, "updated-by") |
| 285 | + obsoletes = extract_doc_list(node, "obsoletes") |
| 286 | + obsoleted_by = extract_doc_list(node, "obsoleted-by") |
| 287 | + stream = get_child_text(node, "stream") |
| 288 | + wg = get_child_text(node, "wg_acronym") |
| 289 | + if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15): |
| 290 | + wg = None |
| 291 | + |
| 292 | + l = [] |
| 293 | + pages = "" |
| 294 | + for fmt in node.getElementsByTagName("format"): |
| 295 | + l.append(get_child_text(fmt, "file-format")) |
| 296 | + if get_child_text(fmt, "file-format") == "ASCII": |
| 297 | + pages = get_child_text(fmt, "page-count") |
| 298 | + file_formats = (",".join(l)).lower() |
| 299 | + |
| 300 | + abstract = "" |
| 301 | + for abstract in node.getElementsByTagName("abstract"): |
| 302 | + abstract = get_child_text(abstract, "p") |
| 303 | + |
| 304 | + draft = get_child_text(node, "draft") |
| 305 | + if draft and re.search("-\d\d$", draft): |
| 306 | + draft = draft[0:-3] |
| 307 | + |
| 308 | + if len(node.getElementsByTagName("errata-url")) > 0: |
| 309 | + has_errata = 1 |
254 | 310 | else: |
255 | | - also_list[docid] = [bcpid] |
256 | | - |
257 | | - elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry": |
258 | | - events.expandNode(node) |
259 | | - node.normalize() |
260 | | - rfc_number = int(get_child_text(node, "doc-id")[3:]) |
261 | | - title = get_child_text(node, "title") |
262 | | - |
263 | | - authors = [] |
264 | | - for author in node.getElementsByTagName("author"): |
265 | | - authors.append(get_child_text(author, "name")) |
266 | | - |
267 | | - d = node.getElementsByTagName("date")[0] |
268 | | - year = int(get_child_text(d, "year")) |
269 | | - month = get_child_text(d, "month") |
270 | | - month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1 |
271 | | - rfc_published_date = datetime.date(year, month, 1) |
272 | | - |
273 | | - current_status = get_child_text(node, "current-status").title() |
274 | | - |
275 | | - updates = extract_doc_list(node, "updates") |
276 | | - updated_by = extract_doc_list(node, "updated-by") |
277 | | - obsoletes = extract_doc_list(node, "obsoletes") |
278 | | - obsoleted_by = extract_doc_list(node, "obsoleted-by") |
279 | | - stream = get_child_text(node, "stream") |
280 | | - wg = get_child_text(node, "wg_acronym") |
281 | | - if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15): |
282 | | - wg = None |
283 | | - |
284 | | - l = [] |
285 | | - pages = "" |
286 | | - for fmt in node.getElementsByTagName("format"): |
287 | | - l.append(get_child_text(fmt, "file-format")) |
288 | | - if get_child_text(fmt, "file-format") == "ASCII": |
289 | | - pages = get_child_text(fmt, "page-count") |
290 | | - file_formats = (",".join(l)).lower() |
291 | | - |
292 | | - abstract = "" |
293 | | - for abstract in node.getElementsByTagName("abstract"): |
294 | | - abstract = get_child_text(abstract, "p") |
295 | | - |
296 | | - draft = get_child_text(node, "draft") |
297 | | - if draft and re.search("-\d\d$", draft): |
298 | | - draft = draft[0:-3] |
299 | | - |
300 | | - if len(node.getElementsByTagName("errata-url")) > 0: |
301 | | - has_errata = 1 |
302 | | - else: |
303 | | - has_errata = 0 |
304 | | - |
305 | | - data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract)) |
| 311 | + has_errata = 0 |
306 | 312 |
|
| 313 | + data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract)) |
| 314 | + except Exception as e: |
| 315 | + log("Exception when processing an RFC index entry: %s" % e) |
| 316 | + log("node: %s" % node) |
| 317 | + raise |
307 | 318 | for d in data: |
308 | 319 | k = "RFC%04d" % d[0] |
309 | 320 | if k in also_list: |
|
0 commit comments