Skip to content

Commit 0df0a87

Browse files
committed
Added a workaround for the current libmagic which quite easily can mischaracterise text/plain documents as text/x-Algol68. Fixes issues ietf-tools#2941 and ietf-tools#2956.
- Legacy-Id: 17594
1 parent 1561120 commit 0df0a87

4 files changed

Lines changed: 15 additions & 20 deletions

File tree

ietf/meeting/forms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def clean_file(self):
341341
mime_type, encoding = validate_mime_type(file, self.mime_types)
342342
if not hasattr(self, 'file_encoding'):
343343
self.file_encoding = {}
344-
self.file_encoding[file.name] = encoding.replace('charset=','') if encoding else None
344+
self.file_encoding[file.name] = encoding or None
345345
if self.mime_types:
346346
if not file.content_type in settings.MEETING_VALID_UPLOAD_MIME_FOR_OBSERVED_MIME[mime_type]:
347347
raise ValidationError('Upload Content-Type (%s) is different from the observed mime-type (%s)' % (file.content_type, mime_type))

ietf/meeting/views.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
from ietf.utils.pipe import pipe
8787
from ietf.utils.pdf import pdf_pages
8888
from ietf.utils.text import xslugify
89-
from ietf.utils.validators import get_mime_type
89+
from ietf.utils.mime import get_mime_type
9090

9191
from .forms import (InterimMeetingModelForm, InterimAnnounceForm, InterimSessionModelForm,
9292
InterimCancelForm, InterimSessionInlineFormSet, FileUploadForm, RequestMinutesForm,)
@@ -221,7 +221,7 @@ def materials_document(request, document, num=None, ext=None):
221221
bytes = file.read()
222222

223223
mtype, chset = get_mime_type(bytes)
224-
content_type = "%s; %s" % (mtype, chset)
224+
content_type = "%s; charset=%s" % (mtype, chset)
225225

226226
file_ext = os.path.splitext(filename)
227227
if len(file_ext) == 2 and file_ext[1] == '.md' and mtype == 'text/plain':

ietf/submit/parsers/base.py

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from __future__ import absolute_import, print_function, unicode_literals
66

77
import re
8-
import magic
98
import datetime
109
import debug # pyflakes:ignore
1110
import six
@@ -15,6 +14,8 @@
1514
from django.conf import settings
1615
from django.template.defaultfilters import filesizeformat
1716

17+
from ietf.utils.mime import get_mime_type
18+
1819
class MetaData(object):
1920
rev = None
2021
name = None
@@ -85,20 +86,7 @@ def parse_filename_extension(self):
8586
def parse_file_type(self):
8687
self.fd.file.seek(0)
8788
content = self.fd.file.read(64*1024)
88-
if hasattr(magic, "open"):
89-
m = magic.open(magic.MAGIC_MIME)
90-
m.load()
91-
filetype = m.buffer(content)
92-
else:
93-
m = magic.Magic()
94-
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
95-
magic.magic_load(m.cookie, None)
96-
filetype = m.from_buffer(content)
97-
if ';' in filetype and 'charset=' in filetype:
98-
mimetype, charset = re.split('; *charset=', filetype)
99-
else:
100-
mimetype = re.split(';', filetype)[0]
101-
charset = 'utf-8'
89+
mimetype, charset = get_mime_type(content)
10290
if not mimetype in self.mimetypes:
10391
self.parsed_info.add_error('Expected an %s file of type "%s", found one of type "%s"' % (self.ext.upper(), '" or "'.join(self.mimetypes), mimetype))
10492
self.parsed_info.mimetype = mimetype

ietf/utils/mime.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from __future__ import absolute_import, print_function, unicode_literals
55

66
import magic
7+
import re
78

89
def get_mime_type(content):
910
# try to fixup encoding
@@ -16,6 +17,12 @@ def get_mime_type(content):
1617
m.cookie = magic.magic_open(magic.MAGIC_NONE | magic.MAGIC_MIME | magic.MAGIC_MIME_ENCODING)
1718
magic.magic_load(m.cookie, None)
1819
filetype = m.from_buffer(content)
19-
20-
return filetype.split('; ', 1)
20+
# Work around silliness in libmagic on OpenSUSE 15.1
21+
filetype = filetype.replace('text/x-Algol68;', 'text/plain;')
22+
if ';' in filetype and 'charset=' in filetype:
23+
mimetype, charset = re.split('; *charset=', filetype)
24+
else:
25+
mimetype = re.split(';', filetype)[0]
26+
charset = 'utf-8'
27+
return mimetype, charset
2128

0 commit comments

Comments
 (0)