Added exception logging for ietf/bin/rfc-editor-queue-updates. Changed the logging in the ietf/bin/rfc-editor-* scripts to use ietf.utils.log.log(). Reordered some imports.

levkowetz · levkowetz · commit d6565f045013 · 2018-07-12T12:52:58.000Z
- Legacy-Id: 15318
diff --git a/ietf/bin/rfc-editor-index-updates b/ietf/bin/rfc-editor-index-updates
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 
 import os, sys, datetime
-import syslog
 import traceback
 
 # boilerplate
@@ -13,15 +12,20 @@ virtualenv_activation = os.path.join(basedir, "env", "bin", "activate_this.py")
 if os.path.exists(virtualenv_activation):
     execfile(virtualenv_activation, dict(__file__=virtualenv_activation))
 
-syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
-
 import django
 django.setup()
 
 from django.conf import settings
 from optparse import OptionParser
 from django.core.mail import mail_admins
 
+from ietf.doc.utils import rebuild_reference_relations
+from ietf.utils.log import log
+from ietf.utils.pipe import pipe
+
+import ietf.sync.rfceditor
+
+
 parser = OptionParser()
 parser.add_option("-d", dest="skip_date",
                   help="To speed up processing skip RFCs published before this date (default is one year ago)", metavar="YYYY-MM-DD")
@@ -32,17 +36,13 @@ skip_date = datetime.date.today() - datetime.timedelta(days=365)
 if options.skip_date:
     skip_date = datetime.datetime.strptime(options.skip_date, "%Y-%m-%d").date()
 
-from ietf.utils.pipe import pipe
-from ietf.doc.utils import rebuild_reference_relations
-import ietf.sync.rfceditor
-
-syslog.syslog("Updating document metadata from RFC index from %s" % settings.RFC_EDITOR_INDEX_URL)
+log("Updating document metadata from RFC index from %s" % settings.RFC_EDITOR_INDEX_URL)
 
 response = ietf.sync.rfceditor.fetch_index_xml(settings.RFC_EDITOR_INDEX_URL)
 data = ietf.sync.rfceditor.parse_index(response)
 
 if len(data) < ietf.sync.rfceditor.MIN_INDEX_RESULTS:
-    syslog.syslog("Not enough results, only %s" % len(data))
+    log("Not enough results, only %s" % len(data))
     sys.exit(1)
 
 new_rfcs = []
@@ -51,8 +51,7 @@ for changes, doc, rfc_published in ietf.sync.rfceditor.update_docs_from_rfc_inde
         new_rfcs.append(doc)
 
     for c in changes:
-        syslog.syslog("%s: %s" % (doc.name, c))
-        print "%s: %s" % (doc.name, c)
+        log("%s: %s" % (doc.name, c))
 
 sys.exit(0)
 
@@ -67,12 +66,12 @@ if newpid == 0:
         pipe("%s -a %s %s" % (settings.RSYNC_BINARY,settings.RFC_TEXT_RSYNC_SOURCE,settings.RFC_PATH))
         for rfc in new_rfcs:
             rebuild_reference_relations(rfc)
-            syslog.syslog("Updated references for %s"%rfc.canonical_name())
+            log("Updated references for %s"%rfc.canonical_name())
     except:
         subject = "Exception in updating references for new rfcs: %s : %s" % (sys.exc_info()[0],sys.exc_info()[1])
         msg = "%s\n%s\n----\n%s"%(sys.exc_info()[0],sys.exc_info()[1],traceback.format_tb(sys.exc_info()[2])) 
         mail_admins(subject,msg,fail_silently=True)
-        syslog.syslog(subject)
+        log(subject)
     os._exit(0)
 else:
     sys.exit(0)
diff --git a/ietf/bin/rfc-editor-queue-updates b/ietf/bin/rfc-editor-queue-updates
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 
-import os, sys, re, json, datetime
-import syslog
+import os, sys
 
 # boilerplate
 basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
@@ -12,28 +11,28 @@ virtualenv_activation = os.path.join(basedir, "env", "bin", "activate_this.py")
 if os.path.exists(virtualenv_activation):
     execfile(virtualenv_activation, dict(__file__=virtualenv_activation))
 
-syslog.openlog(os.path.basename(__file__), syslog.LOG_PID, syslog.LOG_USER)
-
 import django
 django.setup()
 
 from django.conf import settings
-from ietf.sync.rfceditor import *
 
-syslog.syslog("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL)
+from ietf.sync.rfceditor import fetch_queue_xml, parse_queue, MIN_QUEUE_RESULTS, update_drafts_from_queue
+from ietf.utils.log import log
+
+log("Updating RFC Editor queue states from %s" % settings.RFC_EDITOR_QUEUE_URL)
 
 response = fetch_queue_xml(settings.RFC_EDITOR_QUEUE_URL)
 drafts, warnings = parse_queue(response)
 for w in warnings:
-    syslog.syslog(u"WARNING: %s" % w)
+    log(u"Warning: %s" % w)
 
 if len(drafts) < MIN_QUEUE_RESULTS:
-    syslog.syslog("Not enough results, only %s" % len(drafts))
+    log("Not enough results, only %s" % len(drafts))
     sys.exit(1)
 
 changed, warnings = update_drafts_from_queue(drafts)
 for w in warnings:
-    syslog.syslog(u"WARNING: %s" % w)
+    log(u"Warning: %s" % w)
 
 for c in changed:
-    syslog.syslog(u"Updated %s" % c)
+    log(u"Updated %s" % c)
diff --git a/ietf/sync/rfceditor.py b/ietf/sync/rfceditor.py
@@ -8,6 +8,8 @@
 
 from django.conf import settings
 
+import debug                            # pyflakes:ignore
+
 from ietf.doc.models import ( Document, DocAlias, State, StateType, DocEvent, DocRelationshipName,
     DocTagName, DocTypeName, RelatedDocument )
 from ietf.doc.expire import move_draft_files_to_archive
@@ -45,68 +47,73 @@ def parse_queue(response):
     stream = None
 
     for event, node in events:
-        if event == pulldom.START_ELEMENT and node.tagName == "entry":
-            events.expandNode(node)
-            node.normalize()
-            draft_name = get_child_text(node, "draft").strip()
-            draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name)
-            date_received = get_child_text(node, "date-received")
+        try:
+            if event == pulldom.START_ELEMENT and node.tagName == "entry":
+                events.expandNode(node)
+                node.normalize()
+                draft_name = get_child_text(node, "draft").strip()
+                draft_name = re.sub("(-\d\d)?(.txt){1,2}$", "", draft_name)
+                date_received = get_child_text(node, "date-received")
+
+                state = ""
+                tags = []
+                missref_generation = ""
+                for child in node.childNodes:
+                    if child.nodeType == Node.ELEMENT_NODE and child.localName == "state":
+                        state = child.firstChild.data
+                        # state has some extra annotations encoded, parse
+                        # them out
+                        if '*R' in state:
+                            tags.append("ref")
+                            state = state.replace("*R", "")
+                        if '*A' in state:
+                            tags.append("iana")
+                            state = state.replace("*A", "")
+                        m = re.search(r"\(([0-9]+)G\)", state)
+                        if m:
+                            missref_generation = m.group(1)
+                            state = state.replace("(%sG)" % missref_generation, "")
+
+                # AUTH48 link
+                auth48 = ""
+                for child in node.childNodes:
+                    if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url":
+                        auth48 = child.firstChild.data
+
+                # cluster link (if it ever gets implemented)
+                cluster = ""
+                for child in node.childNodes:
+                    if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url":
+                        cluster = child.firstChild.data
+
+                refs = []
+                for child in node.childNodes:
+                    if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef":
+                        ref_name = get_child_text(child, "ref-name")
+                        ref_state = get_child_text(child, "ref-state")
+                        in_queue = ref_state.startswith("IN-QUEUE")
+                        refs.append((ref_name, ref_state, in_queue))
+
+                drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs))
+
+            elif event == pulldom.START_ELEMENT and node.tagName == "section":
+                name = node.getAttribute('name')
+                if name.startswith("IETF"):
+                    stream = "ietf"
+                elif name.startswith("IAB"):
+                    stream = "iab"
+                elif name.startswith("IRTF"):
+                    stream = "irtf"
+                elif name.startswith("INDEPENDENT"):
+                    stream = "ise"
+                else:
+                    stream = None
+                    warnings.append("unrecognized section " + name)
+        except Exception as e:
+            log("Exception when processing an RFC queue entry: %s" % e)
+            log("node: %s" % node)
+            raise
             
-            state = ""
-            tags = []
-            missref_generation = ""
-            for child in node.childNodes:
-                if child.nodeType == Node.ELEMENT_NODE and child.localName == "state":
-                    state = child.firstChild.data
-                    # state has some extra annotations encoded, parse
-                    # them out
-                    if '*R' in state:
-                        tags.append("ref")
-                        state = state.replace("*R", "")
-                    if '*A' in state:
-                        tags.append("iana")
-                        state = state.replace("*A", "")
-                    m = re.search(r"\(([0-9]+)G\)", state)
-                    if m:
-                        missref_generation = m.group(1)
-                        state = state.replace("(%sG)" % missref_generation, "")
-
-            # AUTH48 link
-            auth48 = ""
-            for child in node.childNodes:
-                if child.nodeType == Node.ELEMENT_NODE and child.localName == "auth48-url":
-                    auth48 = child.firstChild.data
-
-            # cluster link (if it ever gets implemented)
-            cluster = ""
-            for child in node.childNodes:
-                if child.nodeType == Node.ELEMENT_NODE and child.localName == "cluster-url":
-                    cluster = child.firstChild.data
-
-            refs = []
-            for child in node.childNodes:
-                if child.nodeType == Node.ELEMENT_NODE and child.localName == "normRef":
-                    ref_name = get_child_text(child, "ref-name")
-                    ref_state = get_child_text(child, "ref-state")
-                    in_queue = ref_state.startswith("IN-QUEUE")
-                    refs.append((ref_name, ref_state, in_queue))
-
-            drafts.append((draft_name, date_received, state, tags, missref_generation, stream, auth48, cluster, refs))
-        
-        elif event == pulldom.START_ELEMENT and node.tagName == "section":
-            name = node.getAttribute('name')
-            if name.startswith("IETF"):
-                stream = "ietf"
-            elif name.startswith("IAB"):
-                stream = "iab"
-            elif name.startswith("IRTF"):
-                stream = "irtf"
-            elif name.startswith("INDEPENDENT"):
-                stream = "ise"
-            else:
-                stream = None
-                warnings.append("unrecognized section " + name)
-
     return drafts, warnings
 
 def update_drafts_from_queue(drafts):
@@ -243,67 +250,71 @@ def extract_doc_list(parentNode, tagName):
     data = []
     events = pulldom.parse(response)
     for event, node in events:
-        if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]:
-            events.expandNode(node)
-            node.normalize()
-            bcpid = normalize_std_name(get_child_text(node, "doc-id"))
-            doclist = extract_doc_list(node, "is-also")
-            for docid in doclist:
-                if docid in also_list:
-                    also_list[docid].append(bcpid)
+        try:
+            if event == pulldom.START_ELEMENT and node.tagName in ["bcp-entry", "fyi-entry", "std-entry"]:
+                events.expandNode(node)
+                node.normalize()
+                bcpid = normalize_std_name(get_child_text(node, "doc-id"))
+                doclist = extract_doc_list(node, "is-also")
+                for docid in doclist:
+                    if docid in also_list:
+                        also_list[docid].append(bcpid)
+                    else:
+                        also_list[docid] = [bcpid]
+
+            elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry":
+                events.expandNode(node)
+                node.normalize()
+                rfc_number = int(get_child_text(node, "doc-id")[3:])
+                title = get_child_text(node, "title")
+
+                authors = []
+                for author in node.getElementsByTagName("author"):
+                    authors.append(get_child_text(author, "name"))
+
+                d = node.getElementsByTagName("date")[0]
+                year = int(get_child_text(d, "year"))
+                month = get_child_text(d, "month")
+                month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1
+                rfc_published_date = datetime.date(year, month, 1)
+
+                current_status = get_child_text(node, "current-status").title()
+
+                updates = extract_doc_list(node, "updates") 
+                updated_by = extract_doc_list(node, "updated-by")
+                obsoletes = extract_doc_list(node, "obsoletes") 
+                obsoleted_by = extract_doc_list(node, "obsoleted-by")
+                stream = get_child_text(node, "stream")
+                wg = get_child_text(node, "wg_acronym")
+                if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15):
+                    wg = None
+
+                l = []
+                pages = ""
+                for fmt in node.getElementsByTagName("format"):
+                    l.append(get_child_text(fmt, "file-format"))
+                    if get_child_text(fmt, "file-format") == "ASCII":
+                        pages = get_child_text(fmt, "page-count")
+                file_formats = (",".join(l)).lower()
+
+                abstract = ""
+                for abstract in node.getElementsByTagName("abstract"):
+                    abstract = get_child_text(abstract, "p")
+
+                draft = get_child_text(node, "draft")
+                if draft and re.search("-\d\d$", draft):
+                    draft = draft[0:-3]
+
+                if len(node.getElementsByTagName("errata-url")) > 0:
+                    has_errata = 1
                 else:
-                    also_list[docid] = [bcpid]
-
-        elif event == pulldom.START_ELEMENT and node.tagName == "rfc-entry":
-            events.expandNode(node)
-            node.normalize()
-            rfc_number = int(get_child_text(node, "doc-id")[3:])
-            title = get_child_text(node, "title")
-
-            authors = []
-            for author in node.getElementsByTagName("author"):
-                authors.append(get_child_text(author, "name"))
-
-            d = node.getElementsByTagName("date")[0]
-            year = int(get_child_text(d, "year"))
-            month = get_child_text(d, "month")
-            month = ["January","February","March","April","May","June","July","August","September","October","November","December"].index(month)+1
-            rfc_published_date = datetime.date(year, month, 1)
-
-            current_status = get_child_text(node, "current-status").title()
-
-            updates = extract_doc_list(node, "updates") 
-            updated_by = extract_doc_list(node, "updated-by")
-            obsoletes = extract_doc_list(node, "obsoletes") 
-            obsoleted_by = extract_doc_list(node, "obsoleted-by")
-            stream = get_child_text(node, "stream")
-            wg = get_child_text(node, "wg_acronym")
-            if wg and ((wg == "NON WORKING GROUP") or len(wg) > 15):
-                wg = None
-           
-            l = []
-            pages = ""
-            for fmt in node.getElementsByTagName("format"):
-                l.append(get_child_text(fmt, "file-format"))
-                if get_child_text(fmt, "file-format") == "ASCII":
-                    pages = get_child_text(fmt, "page-count")
-            file_formats = (",".join(l)).lower()
-
-            abstract = ""
-            for abstract in node.getElementsByTagName("abstract"):
-                abstract = get_child_text(abstract, "p")
-
-            draft = get_child_text(node, "draft")
-            if draft and re.search("-\d\d$", draft):
-                draft = draft[0:-3]
-
-            if len(node.getElementsByTagName("errata-url")) > 0:
-                has_errata = 1
-            else:
-                has_errata = 0
-
-            data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract))
+                    has_errata = 0
 
+                data.append((rfc_number,title,authors,rfc_published_date,current_status,updates,updated_by,obsoletes,obsoleted_by,[],draft,has_errata,stream,wg,file_formats,pages,abstract))
+        except Exception as e:
+            log("Exception when processing an RFC index entry: %s" % e)
+            log("node: %s" % node)
+            raise
     for d in data:
         k = "RFC%04d" % d[0]
         if k in also_list: