Skip to content

Commit fd53f98

Browse files
author
Sasha Romijn
committed
Fix ietf-tools#2584 - Add additional content validation for uploaded texts.
Permitted MIME types are now text/plain, text/markdown and text/x-rst. This applies to all usages of get_cleaned_text_file_content(), including reviews, but also other similar places where text can either be written either into a textarea or uploaded. Commit ready for merge. - Legacy-Id: 16922
1 parent 9df5839 commit fd53f98

3 files changed

Lines changed: 54 additions & 2 deletions

File tree

ietf/settings.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,10 @@ def skip_unreadable_post(record):
672672
"bluesheets": "https://www.ietf.org/proceedings/{meeting.number}/bluesheets/{doc.uploaded_filename}",
673673
}
674674

675+
# Valid MIME types for cases where text is uploaded and immediately extracted,
676+
# e.g. a charter or a review. Must be a tuple, not a list.
677+
DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES = ('text/plain', 'text/markdown', 'text/x-rst')
678+
675679
# Override this in settings_local.py if needed
676680
CACHE_MIDDLEWARE_SECONDS = 300
677681
CACHE_MIDDLEWARE_KEY_PREFIX = ''
@@ -859,6 +863,13 @@ def skip_unreadable_post(record):
859863
TZDATA_ICS_PATH = BASE_DIR + '/../vzic/zoneinfo/'
860864
CHANGELOG_PATH = BASE_DIR + '/../changelog'
861865

866+
MEETING_VALID_UPLOAD_MIME_TYPES = {
867+
'agenda': ['text/plain', 'text/html', 'text/markdown', ],
868+
'minutes': ['text/plain', 'text/html', 'application/pdf', 'text/markdown', ],
869+
'slides': [],
870+
'bluesheets': ['application/pdf', 'text/plain', ],
871+
}
872+
862873
SECR_BLUE_SHEET_PATH = '/a/www/ietf-datatracker/documents/blue_sheet.rtf'
863874
SECR_BLUE_SHEET_URL = '//datatracker.ietf.org/documents/blue_sheet.rtf'
864875
SECR_INTERIM_LISTING_DIR = '/a/www/www6/meeting/interim'

ietf/utils/test_textupload.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright The IETF Trust 2015-2019, All Rights Reserved
2+
# -*- coding: utf-8 -*-
3+
4+
5+
from __future__ import absolute_import, print_function, unicode_literals
6+
7+
from django.core.exceptions import ValidationError
8+
from django.core.files.uploadedfile import SimpleUploadedFile
9+
10+
from .textupload import get_cleaned_text_file_content
11+
from ietf.utils.test_utils import TestCase
12+
13+
14+
class GetCleanedTextFileContentTest(TestCase):
15+
def test_no_file(self):
16+
self.assertEqual(get_cleaned_text_file_content(None), "")
17+
18+
def test_valid_file(self):
19+
data = 'testing 👾'
20+
uploaded_file = SimpleUploadedFile('data.txt', data.encode('utf-8'))
21+
self.assertEqual(get_cleaned_text_file_content(uploaded_file), data)
22+
23+
def test_invalid_mime_type_gif(self):
24+
data = 'GIF89a;'
25+
uploaded_file = SimpleUploadedFile('data.txt', data.encode('utf-8'))
26+
with self.assertRaises(ValidationError) as context:
27+
get_cleaned_text_file_content(uploaded_file)
28+
self.assertIn('does not appear to be a text file', context.exception.message)
29+
self.assertIn('image/gif', context.exception.message)
30+
31+
def test_invalid_mime_type_rst(self):
32+
data = r'{\rtf1}'
33+
uploaded_file = SimpleUploadedFile('data.txt', data.encode('utf-8'))
34+
with self.assertRaises(ValidationError) as context:
35+
get_cleaned_text_file_content(uploaded_file)
36+
self.assertIn('does not appear to be a text file', context.exception.message)
37+
self.assertIn('text/rtf', context.exception.message)

ietf/utils/textupload.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from django.core.exceptions import ValidationError
1010

1111
import debug # pyflakes:ignore
12+
from ietf.settings import DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES
13+
1214

1315
def get_cleaned_text_file_content(uploaded_file):
1416
"""Read uploaded file, try to fix up encoding to UTF-8 and
@@ -36,8 +38,10 @@ def get_cleaned_text_file_content(uploaded_file):
3638
magic.magic_load(m.cookie, None)
3739
filetype = m.from_buffer(content)
3840

39-
if not filetype.startswith("text"):
40-
raise ValidationError("Uploaded file does not appear to be a text file.")
41+
if not filetype.startswith(DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES):
42+
raise ValidationError("Uploaded file does not appear to be a text file. "
43+
"Permitted MIME types are {}, this file is {}"
44+
.format(', '.join(DOC_TEXT_FILE_VALID_UPLOAD_MIME_TYPES), filetype))
4145

4246
match = re.search(r"charset=([\w-]+)", filetype)
4347
if not match:

0 commit comments

Comments
 (0)