Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 2 additions & 13 deletions ietf/doc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from ietf.person.utils import get_active_balloters
from ietf.utils import log
from ietf.utils.decorators import memoize
from ietf.utils.text import decode_document_content
from ietf.utils.validators import validate_no_control_chars
from ietf.utils.mail import formataddr
from ietf.utils.models import ForeignKey
Expand Down Expand Up @@ -640,19 +641,7 @@ def text(self, size = -1):
except IOError as e:
log.log(f"Error reading text for {path}: {e}")
return None
text = None
try:
text = raw.decode('utf-8')
except UnicodeDecodeError:
for back in range(1,4):
try:
text = raw[:-back].decode('utf-8')
break
except UnicodeDecodeError:
pass
if text is None:
text = raw.decode('latin-1')
return text
return decode_document_content(raw)

def text_or_error(self):
return self.text() or "Error; cannot read '%s'"%self.get_base_name()
Expand Down
47 changes: 23 additions & 24 deletions ietf/doc/storage_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from django.core.files.storage import storages, Storage

from ietf.utils.log import log
from ietf.utils.text import decode_document_content


class StorageUtilsError(Exception):
Expand Down Expand Up @@ -164,32 +165,30 @@ def store_str(

def retrieve_bytes(kind: str, name: str) -> bytes:
from ietf.doc.storage import maybe_log_timing
content = b""
if settings.ENABLE_BLOBSTORAGE:
try:
store = _get_storage(kind)
with store.open(name) as f:
with maybe_log_timing(
hasattr(store, "ietf_log_blob_timing") and store.ietf_log_blob_timing,
"read",
bucket_name=store.bucket_name if hasattr(store, "bucket_name") else "",
name=name,
):
content = f.read()
except Exception as err:
log(f"Blobstore Error: Failed to read bytes from {kind}:{name}: {repr(err)}")
raise
if not settings.ENABLE_BLOBSTORAGE:
return b""
try:
store = _get_storage(kind)
with store.open(name) as f:
with maybe_log_timing(
hasattr(store, "ietf_log_blob_timing") and store.ietf_log_blob_timing,
"read",
bucket_name=store.bucket_name if hasattr(store, "bucket_name") else "",
name=name,
):
content = f.read()
except Exception as err:
log(f"Blobstore Error: Failed to read bytes from {kind}:{name}: {repr(err)}")
raise
return content


def retrieve_str(kind: str, name: str) -> str:
content = ""
if settings.ENABLE_BLOBSTORAGE:
try:
content_bytes = retrieve_bytes(kind, name)
# TODO-BLOBSTORE: try to decode all the different ways doc.text() does
content = content_bytes.decode("utf-8")
except Exception as err:
log(f"Blobstore Error: Failed to read string from {kind}:{name}: {repr(err)}")
raise
if not settings.ENABLE_BLOBSTORAGE:
return ""
try:
content = decode_document_content(retrieve_bytes(kind, name))
except Exception as err:
log(f"Blobstore Error: Failed to read string from {kind}:{name}: {repr(err)}")
raise
return content
30 changes: 15 additions & 15 deletions ietf/group/tests_review.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,10 +888,10 @@ def test_requests_history_filter_page(self):
self.assertEqual(r.status_code, 200)
self.assertContains(r, review_req.doc.name)
self.assertContains(r, review_req2.doc.name)
self.assertContains(r, 'Assigned')
self.assertContains(r, 'Accepted')
self.assertContains(r, 'Completed')
self.assertContains(r, 'Ready')
self.assertContains(r, 'data-text="Assigned"')
self.assertContains(r, 'data-text="Accepted"')
self.assertContains(r, 'data-text="Completed"')
self.assertContains(r, 'data-text="Ready"')
self.assertContains(r, escape(assignment.reviewer.person.name))
self.assertContains(r, escape(assignment2.reviewer.person.name))

Expand All @@ -907,10 +907,10 @@ def test_requests_history_filter_page(self):
self.assertEqual(r.status_code, 200)
self.assertContains(r, review_req.doc.name)
self.assertNotContains(r, review_req2.doc.name)
self.assertContains(r, 'Assigned')
self.assertNotContains(r, 'Accepted')
self.assertNotContains(r, 'Completed')
self.assertNotContains(r, 'Ready')
self.assertContains(r, 'data-text="Assigned"')
self.assertNotContains(r, 'data-text="Accepted"')
self.assertNotContains(r, 'data-text="Completed"')
self.assertNotContains(r, 'data-text="Ready"')
self.assertContains(r, escape(assignment.reviewer.person.name))
self.assertNotContains(r, escape(assignment2.reviewer.person.name))

Expand All @@ -926,10 +926,10 @@ def test_requests_history_filter_page(self):
self.assertEqual(r.status_code, 200)
self.assertNotContains(r, review_req.doc.name)
self.assertContains(r, review_req2.doc.name)
self.assertNotContains(r, 'Assigned')
self.assertContains(r, 'Accepted')
self.assertContains(r, 'Completed')
self.assertContains(r, 'Ready')
self.assertNotContains(r, 'data-text="Assigned"')
self.assertContains(r, 'data-text="Accepted"')
self.assertContains(r, 'data-text="Completed"')
self.assertContains(r, 'data-text="Ready"')
self.assertNotContains(r, escape(assignment.reviewer.person.name))
self.assertContains(r, escape(assignment2.reviewer.person.name))

Expand All @@ -940,9 +940,9 @@ def test_requests_history_filter_page(self):
r = self.client.get(url)
self.assertEqual(r.status_code, 200)
self.assertNotContains(r, review_req.doc.name)
self.assertNotContains(r, 'Assigned')
self.assertNotContains(r, 'Accepted')
self.assertNotContains(r, 'Completed')
self.assertNotContains(r, 'data-text="Assigned"')
self.assertNotContains(r, 'data-text="Accepted"')
self.assertNotContains(r, 'data-text="Completed"')

def test_requests_history_invalid_filter_parameters(self):
# First assignment as assigned
Expand Down
2 changes: 1 addition & 1 deletion ietf/meeting/tests_session_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def test_edit(self):
self.assertRedirects(r, redirect_url)

# Check whether updates were stored in the database
sessions = Session.objects.filter(meeting=meeting, group=mars)
sessions = Session.objects.filter(meeting=meeting, group=mars).order_by("id")
self.assertEqual(len(sessions), 2)
session = sessions[0]
self.assertFalse(session.constraints().filter(name='time_relation'))
Expand Down
7 changes: 4 additions & 3 deletions ietf/meeting/tests_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from django.http import QueryDict, FileResponse
from django.template import Context, Template
from django.utils import timezone
from django.utils.html import escape
from django.utils.safestring import mark_safe
from django.utils.text import slugify

Expand Down Expand Up @@ -9491,7 +9492,7 @@ def test_session_attendance(self):
self.assertEqual(r.status_code, 200)
self.assertContains(r, '3 attendees')
for person in persons:
self.assertContains(r, person.plain_name())
self.assertContains(r, escape(person.plain_name()))

# Test for the "I was there" button.
def _test_button(person, expected):
Expand All @@ -9511,14 +9512,14 @@ def _test_button(person, expected):
# attempt to POST anyway is ignored
r = self.client.post(attendance_url)
self.assertEqual(r.status_code, 200)
self.assertNotContains(r, persons[3].plain_name())
self.assertNotContains(r, escape(persons[3].plain_name()))
self.assertEqual(session.attended_set.count(), 3)
# button is shown, and POST is accepted
meeting.importantdate_set.update(name_id='revsub',date=date_today() + datetime.timedelta(days=20))
_test_button(persons[3], True)
r = self.client.post(attendance_url)
self.assertEqual(r.status_code, 200)
self.assertContains(r, persons[3].plain_name())
self.assertContains(r, escape(persons[3].plain_name()))
self.assertEqual(session.attended_set.count(), 4)

# When the meeting is finalized, a bluesheet file is generated,
Expand Down
19 changes: 0 additions & 19 deletions ietf/utils/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
set_url_coverage,
)
from ietf.utils.test_utils import TestCase, unicontent
from ietf.utils.text import parse_unicode
from ietf.utils.timezone import timezone_not_near_midnight
from ietf.utils.xmldraft import XMLDraft, InvalidMetadataError, capture_xml2rfc_output

Expand Down Expand Up @@ -864,24 +863,6 @@ def test_assertion(self):
assertion('False')
settings.SERVER_MODE = 'test'

class TestRFC2047Strings(TestCase):
def test_parse_unicode(self):
names = (
('=?utf-8?b?4Yuz4YuK4Ym1IOGJoOGJgOGIiA==?=', 'ዳዊት በቀለ'),
('=?utf-8?b?5Li9IOmDnA==?=', '丽 郜'),
('=?utf-8?b?4KSV4KSu4KWN4KSs4KWL4KScIOCkoeCkvuCksA==?=', 'कम्बोज डार'),
('=?utf-8?b?zpfPgc6szrrOu861zrnOsSDOm865z4zOvc+Ezrc=?=', 'Ηράκλεια Λιόντη'),
('=?utf-8?b?15nXqdeo15DXnCDXqNeV15bXoNek15zXkw==?=', 'ישראל רוזנפלד'),
('=?utf-8?b?5Li95Y2OIOeahw==?=', '丽华 皇'),
('=?utf-8?b?77ul77qu766V77qzIO+tlu+7ru+vvu+6ju+7pw==?=', 'ﻥﺮﮕﺳ ﭖﻮﯾﺎﻧ'),
('=?utf-8?b?77uh77uu77qz77uu76++IO+6su+7tO+7p++6jSDvurDvu6Pvuo7vu6jvr74=?=', 'ﻡﻮﺳﻮﯾ ﺲﻴﻧﺍ ﺰﻣﺎﻨﯾ'),
('=?utf-8?b?ScOxaWdvIFNhbsOnIEliw6HDsWV6IGRlIGxhIFBlw7Fh?=', 'Iñigo Sanç Ibáñez de la Peña'),
('Mart van Oostendorp', 'Mart van Oostendorp'),
('', ''),
)
for encoded_str, unicode in names:
self.assertEqual(unicode, parse_unicode(encoded_str))

class TestAndroidSiteManifest(TestCase):
def test_manifest(self):
r = self.client.get(urlreverse('site.webmanifest'))
Expand Down
71 changes: 71 additions & 0 deletions ietf/utils/tests_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright The IETF Trust 2021-2026, All Rights Reserved
from ietf.utils.test_utils import TestCase
from ietf.utils.text import parse_unicode, decode_document_content


class TestDecoders(TestCase):
def test_parse_unicode(self):
names = (
("=?utf-8?b?4Yuz4YuK4Ym1IOGJoOGJgOGIiA==?=", "ዳዊት በቀለ"),
("=?utf-8?b?5Li9IOmDnA==?=", "丽 郜"),
("=?utf-8?b?4KSV4KSu4KWN4KSs4KWL4KScIOCkoeCkvuCksA==?=", "कम्बोज डार"),
("=?utf-8?b?zpfPgc6szrrOu861zrnOsSDOm865z4zOvc+Ezrc=?=", "Ηράκλεια Λιόντη"),
("=?utf-8?b?15nXqdeo15DXnCDXqNeV15bXoNek15zXkw==?=", "ישראל רוזנפלד"),
("=?utf-8?b?5Li95Y2OIOeahw==?=", "丽华 皇"),
("=?utf-8?b?77ul77qu766V77qzIO+tlu+7ru+vvu+6ju+7pw==?=", "ﻥﺮﮕﺳ ﭖﻮﯾﺎﻧ"),
(
"=?utf-8?b?77uh77uu77qz77uu76++IO+6su+7tO+7p++6jSDvurDvu6Pvuo7vu6jvr74=?=",
"ﻡﻮﺳﻮﯾ ﺲﻴﻧﺍ ﺰﻣﺎﻨﯾ",
),
(
"=?utf-8?b?ScOxaWdvIFNhbsOnIEliw6HDsWV6IGRlIGxhIFBlw7Fh?=",
"Iñigo Sanç Ibáñez de la Peña",
),
("Mart van Oostendorp", "Mart van Oostendorp"),
("", ""),
)
for encoded_str, unicode in names:
self.assertEqual(unicode, parse_unicode(encoded_str))

def test_decode_document_content(self):
utf8_bytes = "𒀭𒊩𒌆𒄈𒋢".encode("utf-8") # ends with 4-byte character
latin1_bytes = "àéîøü".encode("latin-1")
other_bytes = "àéîøü".encode("macintosh") # different from its latin-1 encoding
assert other_bytes.decode("macintosh") != other_bytes.decode("latin-1"),\
"test broken: other_bytes must decode differently as latin-1"

# simplest case
self.assertEqual(
decode_document_content(utf8_bytes),
utf8_bytes.decode(),
)
# losing 1-4 bytes from the end leave the last character incomplete; the
# decoder should decode all but that last character
self.assertEqual(
decode_document_content(utf8_bytes[:-1]),
utf8_bytes.decode()[:-1],
)
self.assertEqual(
decode_document_content(utf8_bytes[:-2]),
utf8_bytes.decode()[:-1],
)
self.assertEqual(
decode_document_content(utf8_bytes[:-3]),
utf8_bytes.decode()[:-1],
)
self.assertEqual(
decode_document_content(utf8_bytes[:-4]),
utf8_bytes.decode()[:-1],
)

# latin-1 is also simple
self.assertEqual(
decode_document_content(latin1_bytes),
latin1_bytes.decode("latin-1"),
)

# other character sets are just treated as latin1 (bug? feature? you decide)
self.assertEqual(
decode_document_content(other_bytes),
other_bytes.decode("latin-1"),
)
18 changes: 18 additions & 0 deletions ietf/utils/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,21 @@ def parse_unicode(text):
else:
text = decoded_string
return text


def decode_document_content(content: bytes) -> str:
"""Decode document contents as utf-8 or latin1

Method was developed in DocumentInfo.text() where it gave acceptable results
for existing documents / RFCs.
"""
try:
return content.decode("utf-8")
except UnicodeDecodeError:
pass
for back in range(1, 4):
try:
return content[:-back].decode("utf-8")
except UnicodeDecodeError:
pass
return content.decode("latin-1") # everything is legal in latin-1
Loading