Skip to content

Commit b04254a

Browse files
Treat application/octet-stream as text/markdown for '.md' materials uploads. Refactor FileUploadForm hierarchy to reduce boilerplate. Fixes ietf-tools#3163. Commit ready for merge.
- Legacy-Id: 19744
1 parent 50bdddb commit b04254a

5 files changed

Lines changed: 197 additions & 56 deletions

File tree

ietf/meeting/forms.py

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
import os
77
import datetime
88
import json
9+
import re
10+
11+
from pathlib import Path
912

1013
from django import forms
1114
from django.conf import settings
@@ -326,14 +329,19 @@ def __init__(self, *args, **kwargs):
326329
self.fields['date'].widget.attrs['disabled'] = True
327330

328331
class FileUploadForm(forms.Form):
332+
"""Base class for FileUploadForms
333+
334+
Abstract base class - subclasses must fill in the doc_type value with
335+
the type of document they handle.
336+
"""
329337
file = forms.FileField(label='File to upload')
330338

339+
doc_type = '' # subclasses must set this
340+
331341
def __init__(self, *args, **kwargs):
332-
doc_type = kwargs.pop('doc_type')
333-
assert doc_type in settings.MEETING_VALID_UPLOAD_EXTENSIONS
334-
self.doc_type = doc_type
335-
self.extensions = settings.MEETING_VALID_UPLOAD_EXTENSIONS[doc_type]
336-
self.mime_types = settings.MEETING_VALID_UPLOAD_MIME_TYPES[doc_type]
342+
assert self.doc_type in settings.MEETING_VALID_UPLOAD_EXTENSIONS
343+
self.extensions = settings.MEETING_VALID_UPLOAD_EXTENSIONS[self.doc_type]
344+
self.mime_types = settings.MEETING_VALID_UPLOAD_MIME_TYPES[self.doc_type]
337345
super(FileUploadForm, self).__init__(*args, **kwargs)
338346
label = '%s file to upload. ' % (self.doc_type.capitalize(), )
339347
if self.doc_type == "slides":
@@ -346,22 +354,88 @@ def clean_file(self):
346354
file = self.cleaned_data['file']
347355
validate_file_size(file)
348356
ext = validate_file_extension(file, self.extensions)
357+
358+
# override the Content-Type if needed
359+
if file.content_type in 'application/octet-stream':
360+
content_type_map = settings.MEETING_APPLICATION_OCTET_STREAM_OVERRIDES
361+
filename = Path(file.name)
362+
if filename.suffix in content_type_map:
363+
file.content_type = content_type_map[filename.suffix]
364+
self.cleaned_data['file'] = file
365+
349366
mime_type, encoding = validate_mime_type(file, self.mime_types)
350367
if not hasattr(self, 'file_encoding'):
351368
self.file_encoding = {}
352369
self.file_encoding[file.name] = encoding or None
353370
if self.mime_types:
354371
if not file.content_type in settings.MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME[mime_type]:
355372
raise ValidationError('Upload Content-Type (%s) is different from the observed mime-type (%s)' % (file.content_type, mime_type))
356-
if mime_type in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS:
357-
if not ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS[mime_type]:
373+
# We just validated that file.content_type is safe to accept despite being identified
374+
# as a different MIME type by the validator. Check extension based on file.content_type
375+
# because that better reflects the intention of the upload client.
376+
if file.content_type in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS:
377+
if not ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS[file.content_type]:
358378
raise ValidationError('Upload Content-Type (%s) does not match the extension (%s)' % (file.content_type, ext))
359-
if mime_type in ['text/html', ] or ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS['text/html']:
379+
if (file.content_type in ['text/html', ]
380+
or ext in settings.MEETING_VALID_MIME_TYPE_EXTENSIONS.get('text/html', [])):
360381
# We'll do html sanitization later, but for frames, we fail here,
361382
# as the sanitized version will most likely be useless.
362383
validate_no_html_frame(file)
363384
return file
364385

386+
387+
class UploadBlueSheetForm(FileUploadForm):
388+
doc_type = 'bluesheets'
389+
390+
391+
class ApplyToAllFileUploadForm(FileUploadForm):
392+
"""FileUploadField that adds an apply_to_all checkbox
393+
394+
Checkbox can be disabled by passing show_apply_to_all_checkbox=False to the constructor.
395+
This entirely removes the field from the form.
396+
"""
397+
# Note: subclasses must set doc_type for FileUploadForm
398+
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
399+
400+
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
401+
super().__init__(*args, **kwargs)
402+
if not show_apply_to_all_checkbox:
403+
self.fields.pop('apply_to_all')
404+
else:
405+
self.order_fields(
406+
sorted(
407+
self.fields.keys(),
408+
key=lambda f: 'zzzzzz' if f == 'apply_to_all' else f
409+
)
410+
)
411+
412+
class UploadMinutesForm(ApplyToAllFileUploadForm):
413+
doc_type = 'minutes'
414+
415+
416+
class UploadAgendaForm(ApplyToAllFileUploadForm):
417+
doc_type = 'agenda'
418+
419+
420+
class UploadSlidesForm(ApplyToAllFileUploadForm):
421+
doc_type = 'slides'
422+
title = forms.CharField(max_length=255)
423+
424+
def __init__(self, session, *args, **kwargs):
425+
super().__init__(*args, **kwargs)
426+
self.session = session
427+
428+
def clean_title(self):
429+
title = self.cleaned_data['title']
430+
# The current tables only handles Unicode BMP:
431+
if ord(max(title)) > 0xffff:
432+
raise forms.ValidationError("The title contains characters outside the Unicode BMP, which is not currently supported")
433+
if self.session.meeting.type_id=='interim':
434+
if re.search(r'-\d{2}$', title):
435+
raise forms.ValidationError("Interim slides currently may not have a title that ends with something that looks like a revision number (-nn)")
436+
return title
437+
438+
365439
class RequestMinutesForm(forms.Form):
366440
to = MultiEmailField()
367441
cc = MultiEmailField(required=False)

ietf/meeting/tests_forms.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Copyright The IETF Trust 2021, All Rights Reserved
2+
# -*- coding: utf-8 -*-
3+
"""Tests of forms in the Meeting application"""
4+
from django.core.files.uploadedfile import SimpleUploadedFile
5+
from django.test import override_settings
6+
7+
from ietf.meeting.forms import FileUploadForm, ApplyToAllFileUploadForm
8+
from ietf.utils.test_utils import TestCase
9+
10+
11+
@override_settings(
12+
MEETING_APPLICATION_OCTET_STREAM_OVERRIDES={'.md': 'text/markdown'}, # test relies on .txt not mapping
13+
MEETING_VALID_UPLOAD_EXTENSIONS={'minutes': ['.txt', '.md']}, # test relies on .exe being absent
14+
MEETING_VALID_UPLOAD_MIME_TYPES={'minutes': ['text/plain', 'text/markdown']},
15+
MEETING_VALID_MIME_TYPE_EXTENSIONS={'text/plain': ['.txt'], 'text/markdown': ['.md']},
16+
MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME={'text/plain': ['text/plain', 'text/markdown']},
17+
)
18+
class FileUploadFormTests(TestCase):
19+
class TestClass(FileUploadForm):
20+
doc_type = 'minutes'
21+
22+
def test_accepts_valid_data(self):
23+
test_file = SimpleUploadedFile(
24+
name='file.txt',
25+
content=b'plain text',
26+
content_type='text/plain',
27+
)
28+
29+
form = FileUploadFormTests.TestClass(files={'file': test_file})
30+
self.assertTrue(form.is_valid(), 'Test data are valid input')
31+
cleaned_file = form.cleaned_data['file']
32+
self.assertEqual(cleaned_file.name, 'file.txt', 'Uploaded filename should not be changed')
33+
with cleaned_file.open('rb') as f:
34+
self.assertEqual(f.read(), b'plain text', 'Uploaded file contents should not be changed')
35+
self.assertEqual(cleaned_file.content_type, 'text/plain', 'Content-Type should be overridden')
36+
37+
def test_overrides_content_type_application_octet_stream(self):
38+
test_file = SimpleUploadedFile(
39+
name='file.md',
40+
content=b'plain text',
41+
content_type='application/octet-stream',
42+
)
43+
44+
form = FileUploadFormTests.TestClass(files={'file': test_file})
45+
self.assertTrue(form.is_valid(), 'Test data are valid input')
46+
cleaned_file = form.cleaned_data['file']
47+
# Test that the test_file is what actually came out of the cleaning process.
48+
# This is not technically required here, but the other tests check that test_file's
49+
# content_type has not been changed. If cleaning does not modify the content_type
50+
# when it succeeds, then those other tests are not actually testing anything.
51+
self.assertEqual(cleaned_file, test_file, 'Cleaning should return the file object that was passed in')
52+
self.assertEqual(cleaned_file.name, 'file.md', 'Uploaded filename should not be changed')
53+
with cleaned_file.open('rb') as f:
54+
self.assertEqual(f.read(), b'plain text', 'Uploaded file contents should not be changed')
55+
self.assertEqual(cleaned_file.content_type, 'text/markdown', 'Content-Type should be overridden')
56+
57+
def test_overrides_only_application_octet_stream(self):
58+
test_file = SimpleUploadedFile(
59+
name='file.md',
60+
content=b'plain text',
61+
content_type='application/json'
62+
)
63+
64+
form = FileUploadFormTests.TestClass(files={'file': test_file})
65+
self.assertFalse(form.is_valid(), 'Test data are invalid input')
66+
self.assertEqual(test_file.name, 'file.md', 'Uploaded filename should not be changed')
67+
self.assertEqual(test_file.content_type, 'application/json', 'Uploaded Content-Type should not be changed')
68+
69+
def test_overrides_only_requested_extensions_when_valid_ext(self):
70+
test_file = SimpleUploadedFile(
71+
name='file.txt',
72+
content=b'plain text',
73+
content_type='application/octet-stream',
74+
)
75+
76+
form = FileUploadFormTests.TestClass(files={'file': test_file})
77+
self.assertFalse(form.is_valid(), 'Test data are invalid input')
78+
self.assertEqual(test_file.name, 'file.txt', 'Uploaded filename should not be changed')
79+
self.assertEqual(test_file.content_type, 'application/octet-stream', 'Uploaded Content-Type should not be changed')
80+
81+
def test_overrides_only_requested_extensions_when_invalid_ext(self):
82+
test_file = SimpleUploadedFile(
83+
name='file.exe',
84+
content=b'plain text',
85+
content_type='application/octet-stream'
86+
)
87+
88+
form = FileUploadFormTests.TestClass(files={'file': test_file})
89+
self.assertFalse(form.is_valid(), 'Test data are invalid input')
90+
self.assertEqual(test_file.name, 'file.exe', 'Uploaded filename should not be changed')
91+
self.assertEqual(test_file.content_type, 'application/octet-stream', 'Uploaded Content-Type should not be changed')
92+
93+
94+
class ApplyToAllFileUploadFormTests(TestCase):
95+
class TestClass(ApplyToAllFileUploadForm):
96+
doc_type = 'minutes'
97+
98+
def test_has_apply_to_all_field_by_default(self):
99+
form = ApplyToAllFileUploadFormTests.TestClass(show_apply_to_all_checkbox=True)
100+
self.assertIn('apply_to_all', form.fields)
101+
102+
def test_no_show_apply_to_all_field(self):
103+
form = ApplyToAllFileUploadFormTests.TestClass(show_apply_to_all_checkbox=False)
104+
self.assertNotIn('apply_to_all', form.fields)

ietf/meeting/views.py

Lines changed: 2 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@
9999
from ietf.utils.text import xslugify
100100

101101
from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm,
102-
InterimCancelForm, InterimSessionInlineFormSet, FileUploadForm, RequestMinutesForm,)
102+
InterimCancelForm, InterimSessionInlineFormSet, RequestMinutesForm,
103+
UploadAgendaForm, UploadBlueSheetForm, UploadMinutesForm, UploadSlidesForm)
103104

104105

105106
def get_interim_menu_entries(request):
@@ -2341,13 +2342,6 @@ def add_session_drafts(request, session_id, num):
23412342
})
23422343

23432344

2344-
class UploadBlueSheetForm(FileUploadForm):
2345-
2346-
def __init__(self, *args, **kwargs):
2347-
kwargs['doc_type'] = 'bluesheets'
2348-
super(UploadBlueSheetForm, self).__init__(*args, **kwargs )
2349-
2350-
23512345
def upload_session_bluesheets(request, session_id, num):
23522346
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
23532347
session = get_object_or_404(Session,pk=session_id)
@@ -2435,15 +2429,6 @@ def save_bluesheet(request, session, file, encoding='utf-8'):
24352429
doc.save_with_history([e])
24362430
return save_error
24372431

2438-
class UploadMinutesForm(FileUploadForm):
2439-
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
2440-
2441-
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
2442-
kwargs['doc_type'] = 'minutes'
2443-
super(UploadMinutesForm, self).__init__(*args, **kwargs )
2444-
if not show_apply_to_all_checkbox:
2445-
self.fields.pop('apply_to_all')
2446-
24472432

24482433
def upload_session_minutes(request, session_id, num):
24492434
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
@@ -2536,15 +2521,6 @@ def upload_session_minutes(request, session_id, num):
25362521
})
25372522

25382523

2539-
class UploadAgendaForm(FileUploadForm):
2540-
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=True,required=False)
2541-
2542-
def __init__(self, show_apply_to_all_checkbox, *args, **kwargs):
2543-
kwargs['doc_type'] = 'agenda'
2544-
super(UploadAgendaForm, self).__init__(*args, **kwargs )
2545-
if not show_apply_to_all_checkbox:
2546-
self.fields.pop('apply_to_all')
2547-
25482524
def upload_session_agenda(request, session_id, num):
25492525
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
25502526
session = get_object_or_404(Session,pk=session_id)
@@ -2639,27 +2615,6 @@ def upload_session_agenda(request, session_id, num):
26392615
})
26402616

26412617

2642-
class UploadSlidesForm(FileUploadForm):
2643-
title = forms.CharField(max_length=255)
2644-
apply_to_all = forms.BooleanField(label='Apply to all group sessions at this meeting',initial=False,required=False)
2645-
2646-
def __init__(self, session, show_apply_to_all_checkbox, *args, **kwargs):
2647-
self.session = session
2648-
kwargs['doc_type'] = 'slides'
2649-
super(UploadSlidesForm, self).__init__(*args, **kwargs )
2650-
if not show_apply_to_all_checkbox:
2651-
self.fields.pop('apply_to_all')
2652-
2653-
def clean_title(self):
2654-
title = self.cleaned_data['title']
2655-
# The current tables only handles Unicode BMP:
2656-
if ord(max(title)) > 0xffff:
2657-
raise forms.ValidationError("The title contains characters outside the Unicode BMP, which is not currently supported")
2658-
if self.session.meeting.type_id=='interim':
2659-
if re.search(r'-\d{2}$', title):
2660-
raise forms.ValidationError("Interim slides currently may not have a title that ends with something that looks like a revision number (-nn)")
2661-
return title
2662-
26632618
def upload_session_slides(request, session_id, num, name):
26642619
# num is redundant, but we're dragging it along an artifact of where we are in the current URL structure
26652620
session = get_object_or_404(Session,pk=session_id)

ietf/meeting/views_proceedings.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from ietf.utils.text import xslugify
1919

2020
class UploadProceedingsMaterialForm(FileUploadForm):
21+
doc_type = 'procmaterials'
22+
2123
use_url = forms.BooleanField(
2224
required=False,
2325
label='Use an external URL instead of uploading a document',
@@ -34,7 +36,7 @@ class Media:
3436
)
3537

3638
def __init__(self, *args, **kwargs):
37-
super().__init__(doc_type='procmaterials', *args, **kwargs)
39+
super().__init__(*args, **kwargs)
3840
self.fields['file'].label = 'Select a file to upload. Allowed format{}: {}'.format(
3941
'' if len(self.mime_types) == 1 else 's',
4042
', '.join(self.mime_types),

ietf/settings.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,12 @@ def skip_unreadable_post(record):
949949
'application/pdf': ['.pdf'],
950950
}
951951

952+
# Files uploaded with Content-Type application/octet-stream and an extension in this map will
953+
# be treated as if they had been uploaded with the mapped Content-Type value.
954+
MEETING_APPLICATION_OCTET_STREAM_OVERRIDES = {
955+
'.md': 'text/markdown',
956+
}
957+
952958
MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME = {
953959
'text/plain': ['text/plain', 'text/markdown', 'text/x-markdown', ],
954960
'text/html': ['text/html', ],

0 commit comments

Comments
 (0)