datatracker/ietf/utils/xmldraft.py at dev/7.43.1.dev0 · https-github-com-bit/datatracker · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Copyright The IETF Trust 2021, All Rights Reserved
# -*- coding: utf-8 -*-
import os
import xml2rfc

import debug  # pyflakes: ignore

from contextlib import ExitStack

from django.conf import settings

from .draft import Draft


class XMLDraft(Draft):
    """Draft from XML source

    Currently just a holding place for get_refs() for an XML file. Can eventually expand
    to implement the other public methods of Draft as need arises.
    """
    def __init__(self, xml_file):
        """Initialize XMLDraft instance

        :parameter xml_file: path to file containing XML source
        """
        super().__init__()
        # cast xml_file to str so, e.g., this will work with a Path
        self.xmltree = self.parse_xml(str(xml_file))
        self.xmlroot = self.xmltree.getroot()

    @staticmethod
    def parse_xml(filename):
        orig_write_out = xml2rfc.log.write_out
        orig_write_err = xml2rfc.log.write_err
        orig_xml_library = os.environ.get('XML_LIBRARY', None)
        tree = None
        with ExitStack() as stack:
            @stack.callback
            def cleanup():  # called when context exited, even if there's an exception
                xml2rfc.log.write_out = orig_write_out
                xml2rfc.log.write_err = orig_write_err
                os.environ.pop('XML_LIBRARY')
                if orig_xml_library is not None:
                    os.environ['XML_LIBRARY'] = orig_xml_library

            xml2rfc.log.write_out = open(os.devnull, 'w')
            xml2rfc.log.write_err = open(os.devnull, 'w')
            os.environ['XML_LIBRARY'] = settings.XML_LIBRARY

            parser = xml2rfc.XmlRfcParser(filename, quiet=True)
            tree = parser.parse()
            xml_version = tree.getroot().get('version', '2')
            if xml_version == '2':
                v2v3 = xml2rfc.V2v3XmlWriter(tree)
                tree.tree = v2v3.convert2to3()
        return tree

    def _document_name(self, anchor):
        """Guess document name from reference anchor

        Looks for series numbers and removes leading 0s from the number.
        """
        anchor = anchor.lower()  # always give back lowercase
        label = anchor.rstrip('0123456789')  # remove trailing digits
        if label in ['rfc', 'bcp', 'fyi', 'std']:
            number = int(anchor[len(label):])
            return f'{label}{number}'
        return anchor

    def _reference_section_type(self, section_name):
        """Determine reference type from name of references section"""
        section_name = section_name.lower()
        if 'normative' in section_name:
            return self.REF_TYPE_NORMATIVE
        elif 'informative' in section_name:
            return self.REF_TYPE_INFORMATIVE
        return self.REF_TYPE_UNKNOWN

    def get_refs(self):
        """Extract references from the draft"""
        refs = {}
        # accept nested <references> sections
        for section in self.xmlroot.findall('back//references'):
            ref_type = self._reference_section_type(section.findtext('name'))
            for ref in (section.findall('./reference') + section.findall('./referencegroup')):
                refs[self._document_name(ref.get('anchor'))] = ref_type
        return refs