Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions ietf/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,27 +177,33 @@ def dehydrate(self, bundle, for_list=True):
return dehydrated



# XML 1.0 forbids all control characters except tab (#x9), LF (#xA), and CR (#xD).
# Replace each with its Unicode control picture (U+2400 + codepoint) so the
# substitution is lossless and the result is valid XML.
_XML_INVALID_CTRL_RE = re.compile(r"[\x00-\x08\x0b\x0c\x0e-\x1f]")


class Serializer(tastypie.serializers.Serializer):
OPTION_ESCAPE_NULLS = "datatracker-escape-nulls"
OPTION_ESCAPE_XML_INVALID = "datatracker-escape-xml-invalid"

def format_datetime(self, data):
return data.astimezone(datetime.UTC).replace(tzinfo=None).isoformat(timespec="seconds") + "Z"

def to_simple(self, data, options):
options = options or {}
simple_data = super().to_simple(data, options)
if (
options.get(self.OPTION_ESCAPE_NULLS, False)
and isinstance(simple_data, str)
):
# replace nulls with unicode "symbol for null character", \u2400
simple_data = simple_data.replace("\x00", "\u2400")
if options.get(self.OPTION_ESCAPE_XML_INVALID, False) and isinstance(simple_data, str):
# Replace control chars invalid in XML 1.0 with their Unicode
# control pictures (U+2400-U+241F) so lxml won't reject the string.
simple_data = _XML_INVALID_CTRL_RE.sub(
lambda m: chr(ord(m.group()) + 0x2400), simple_data
)
return simple_data

def to_etree(self, data, options=None, name=None, depth=0):
# lxml does not escape nulls on its own, so ask to_simple() to do it.
# This is mostly (only?) an issue when generating errors responses for
# fuzzers.
# lxml rejects control characters that are invalid in XML 1.0.
# Ask to_simple() to escape them before they reach lxml.
options = options or {}
options[self.OPTION_ESCAPE_NULLS] = True
options[self.OPTION_ESCAPE_XML_INVALID] = True
return super().to_etree(data, options, name, depth)
27 changes: 17 additions & 10 deletions ietf/api/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1542,20 +1542,27 @@ def test_all_model_resources_exist(self):
self.assertIn(model._meta.model_name, list(app_resources.keys()),
"There doesn't seem to be any API resource for model %s.models.%s"%(app.__name__,model.__name__,))

def test_serializer_to_etree_handles_nulls(self):
"""Serializer to_etree() should handle a null character"""
def test_serializer_to_etree_handles_xml_invalid_control_chars(self):
"""Serializer to_etree() must not raise ValueError for any XML-invalid control character."""
serializer = Serializer()
# Ordinary strings and strings with valid whitespace must pass through unchanged.
try:
serializer.to_etree("string with no nulls in it")
serializer.to_etree("string with no special chars")
serializer.to_etree("tab\there lf\nhere cr\rhere")
except ValueError:
self.fail("serializer.to_etree raised ValueError on an ordinary string")
try:
serializer.to_etree("string with a \x00 in it")
except ValueError:
self.fail(
"serializer.to_etree raised ValueError on a string "
"containing a null character"
)
# Every control character that XML 1.0 forbids must be escaped rather than
# causing a ValueError. This is the class of characters that triggered the
# production exception (lxml.etree._utf8 rejects them all).
invalid_chars = [chr(c) for c in list(range(0x00, 0x09)) + [0x0b, 0x0c] + list(range(0x0e, 0x20))]
for ch in invalid_chars:
try:
serializer.to_etree(f"string with {ch!r} in it")
except ValueError:
self.fail(
f"serializer.to_etree raised ValueError on a string "
f"containing control character U+{ord(ch):04X}"
)


class RfcdiffSupportTests(TestCase):
Expand Down
Loading