Skip to content

Commit 967ece7

Browse files
committed
Started refactoring of reading text from document files (drafts, charters, etc.) in order to normalise on one way of doing this, and making that return unicode rather than undecoded bytes. This is the first step of two, in order to gauge the possible issues and report on discrepancies.
- Legacy-Id: 14406
1 parent f2f21c4 commit 967ece7

11 files changed

Lines changed: 135 additions & 66 deletions

File tree

ietf/doc/mails.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@
88
from django.conf import settings
99
from django.urls import reverse as urlreverse
1010

11+
import debug # pyflakes:ignore
12+
1113
from ietf.utils.mail import send_mail, send_mail_text
1214
from ietf.ipr.utils import iprs_from_docs, related_docs
1315
from ietf.doc.models import WriteupDocEvent, LastCallDocEvent, DocAlias, ConsensusDocEvent
1416
from ietf.doc.utils import needed_ballot_positions, get_document_content
1517
from ietf.group.models import Role
1618
from ietf.doc.models import Document
1719
from ietf.mailtrigger.utils import gather_address_lists
20+
from ietf.utils import log
1821

1922
def email_state_changed(request, doc, text, mailtrigger_id=None):
2023
(to,cc) = gather_address_lists(mailtrigger_id or 'doc_state_edited',doc=doc)
@@ -515,7 +518,13 @@ def email_charter_internal_review(request, charter):
515518
os.path.join(settings.CHARTER_PATH,filename),
516519
split=False,
517520
markup=False,
518-
)
521+
).decode('utf-8')
522+
utext = charter.text_or_error() # pyflakes:ignore
523+
if charter_text and charter_text != utext and not 'Error; cannot read' in charter_text:
524+
debug.show('charter_text[:64]')
525+
debug.show('utext[:64]')
526+
log.assertion('charter_text == utext')
527+
519528
send_mail(request, addrs.to, settings.DEFAULT_FROM_EMAIL,
520529
'Internal %s Review: %s (%s)'%(charter.group.type.name,charter.group.name,charter.group.acronym),
521530
'doc/mail/charter_internal_review.txt',

ietf/doc/models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,9 @@ def text(self):
449449
#
450450
return text
451451

452+
def text_or_error(self):
453+
return self.text() or "Error; cannot read (%s)"%self.get_file_name()
454+
452455
def htmlized(self):
453456
name = self.get_base_name()
454457
text = self.text()

ietf/doc/templatetags/ietf_filters.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from ietf.doc.models import ConsensusDocEvent
1919
from ietf.doc.utils import get_document_content
2020
from ietf.utils.text import wordwrap, fill, wrap_text_if_unwrapped
21-
21+
from ietf.utils import log
2222

2323
register = template.Library()
2424

@@ -509,7 +509,13 @@ def document_content(doc):
509509
if doc is None:
510510
return None
511511
path = os.path.join(doc.get_file_path(),doc.filename_with_rev())
512-
return get_document_content(doc.name,path,markup=False)
512+
content = get_document_content(doc.name,path,markup=False)
513+
utext = doc.text_or_error() # pyflakes:ignore
514+
if content and content != utext and not 'Error; cannot read' in content:
515+
debug.show('content[:64]')
516+
debug.show('utext[:64]')
517+
log.assertion('content == utext')
518+
return content
513519

514520
@register.filter
515521
def format_timedelta(timedelta):

ietf/doc/utils.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from ietf.name.models import DocReminderTypeName, DocRelationshipName
2323
from ietf.group.models import Role
2424
from ietf.ietfauth.utils import has_role
25-
from ietf.utils import draft, markup_txt
25+
from ietf.utils import draft
2626
from ietf.utils.mail import send_mail
2727
from ietf.mailtrigger.utils import gather_address_lists
2828

@@ -299,17 +299,19 @@ def get_unicode_document_content(key, filename, codec='utf-8', errors='ignore'):
299299
return raw_content
300300

301301
def get_document_content(key, filename, split=True, markup=True):
302+
#log.unreachable("2017-12-05")
302303
try:
303304
with open(filename, 'rb') as f:
304305
raw_content = f.read()
305306
except IOError:
306307
error = "Error; cannot read ("+key+")"
307308
return error
308309

309-
if markup:
310-
return markup_txt.markup(raw_content, split)
311-
else:
312-
return raw_content
310+
# if markup:
311+
# return markup_txt.markup(raw_content, split)
312+
# else:
313+
# return raw_content
314+
return raw_content
313315

314316
def tags_suffix(tags):
315317
return (u"::" + u"::".join(t.name for t in tags)) if tags else u""

ietf/doc/views_conflict_review.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,12 @@ def edit_ad(request, name):
254254
def default_approval_text(review):
255255

256256
filename = "%s-%s.txt" % (review.canonical_name(), review.rev)
257-
current_text = get_document_content(filename, os.path.join(settings.CONFLICT_REVIEW_PATH, filename), split=False, markup=False)
257+
current_text = get_document_content(filename, os.path.join(settings.CONFLICT_REVIEW_PATH, filename), split=False, markup=False).decode('utf-8')
258+
utext = review.text_or_error() # pyflakes:ignore
259+
if current_text and current_text != utext and not 'Error; cannot read' in current_text:
260+
debug.show('current_text[:64]')
261+
debug.show('utext[:64]')
262+
log.assertion('current_text == utext')
258263

259264
conflictdoc = review.relateddocument_set.get(relationship__slug='conflrev').target.document
260265
if conflictdoc.stream_id=='ise':

ietf/doc/views_doc.py

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666
from ietf.review.models import ReviewRequest
6767
from ietf.review.utils import can_request_review_of_doc, review_requests_to_list_for_docs
6868
from ietf.review.utils import no_review_from_teams_on_doc
69+
from ietf.utils import markup_txt, log
70+
from ietf.utils.text import maybe_split
6971

7072

7173
def render_document_top(request, doc, tab, name):
@@ -186,7 +188,13 @@ def document_main(request, name, rev=None):
186188
filename = name + ".txt"
187189

188190
content = get_document_content(filename, os.path.join(settings.RFC_PATH, filename),
189-
split_content, markup=True)
191+
split_content, markup=True).decode('utf-8')
192+
utext = doc.text_or_error() # pyflakes:ignore
193+
if content and content != utext and not 'Error; cannot read' in content:
194+
debug.show('content[:64]')
195+
debug.show('utext[:64]')
196+
log.assertion('content == utext')
197+
content = markup_txt.markup(maybe_split(content, split=split_content))
190198

191199
# file types
192200
base_path = os.path.join(settings.RFC_PATH, name + ".")
@@ -216,7 +224,13 @@ def document_main(request, name, rev=None):
216224
filename = "%s-%s.txt" % (draft_name, doc.rev)
217225

218226
content = get_document_content(filename, os.path.join(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR, filename),
219-
split_content, markup=True)
227+
split_content, markup=True).decode('utf-8')
228+
utext = doc.text_or_error() # pyflakes:ignore
229+
if content and content != utext and not 'Error; cannot read' in content:
230+
debug.show('content[:64]')
231+
debug.show('utext[:64]')
232+
log.assertion('content == utext')
233+
content = markup_txt.markup(maybe_split(content, split=split_content))
220234

221235
# file types
222236
base_path = os.path.join(settings.INTERNET_DRAFT_PATH, doc.name + "-" + doc.rev + ".")
@@ -439,7 +453,13 @@ def document_main(request, name, rev=None):
439453
if doc.type_id == "charter":
440454
filename = "%s-%s.txt" % (doc.canonical_name(), doc.rev)
441455

442-
content = get_document_content(filename, os.path.join(settings.CHARTER_PATH, filename), split=False, markup=True)
456+
content = get_document_content(filename, os.path.join(settings.CHARTER_PATH, filename), split=False, markup=True).decode('utf-8')
457+
utext = doc.text_or_error() # pyflakes:ignore
458+
if content and content != utext and not 'Error; cannot read' in content:
459+
debug.show('content[:64]')
460+
debug.show('utext[:64]')
461+
log.assertion('content == utext')
462+
content = markup_txt.markup(content)
443463

444464
ballot_summary = None
445465
if doc.get_state_slug() in ("intrev", "iesgrev"):
@@ -480,9 +500,15 @@ def document_main(request, name, rev=None):
480500

481501
if doc.rev == "00" and not os.path.isfile(pathname):
482502
# This could move to a template
483-
content = "A conflict review response has not yet been proposed."
503+
content = u"A conflict review response has not yet been proposed."
484504
else:
485-
content = get_document_content(filename, pathname, split=False, markup=True)
505+
content = get_document_content(filename, pathname, split=False, markup=True).decode('utf-8')
506+
utext = doc.text_or_error() # pyflakes:ignore
507+
if content and content != utext and not 'Error; cannot read' in content:
508+
debug.show('content[:64]')
509+
debug.show('utext[:64]')
510+
log.assertion('content == utext')
511+
content = markup_txt.markup(content)
486512

487513
ballot_summary = None
488514
if doc.get_state_slug() in ("iesgeval") and doc.active_ballot():
@@ -507,9 +533,14 @@ def document_main(request, name, rev=None):
507533

508534
if doc.rev == "00" and not os.path.isfile(pathname):
509535
# This could move to a template
510-
content = "Status change text has not yet been proposed."
536+
content = u"Status change text has not yet been proposed."
511537
else:
512-
content = get_document_content(filename, pathname, split=False)
538+
content = get_document_content(filename, pathname, split=False).decode('utf-8')
539+
utext = doc.text_or_error() # pyflakes:ignore
540+
if content and content != utext and not 'Error; cannot read' in content:
541+
debug.show('content[:64]')
542+
debug.show('utext[:64]')
543+
log.assertion('content == utext')
513544

514545
ballot_summary = None
515546
if doc.get_state_slug() in ("iesgeval"):
@@ -562,7 +593,12 @@ def document_main(request, name, rev=None):
562593
url = urlbase + extension
563594

564595
if extension == ".txt":
565-
content = get_document_content(basename, pathname + extension, split=False)
596+
content = get_document_content(basename, pathname + extension, split=False).decode('utf-8')
597+
utext = doc.text_or_error() # pyflakes:ignore
598+
if content != utext:
599+
debug.show('content[:64]')
600+
debug.show('utext[:64]')
601+
log.assertion('content == utext')
566602
t = "plain text"
567603

568604
other_types.append((t, url))

ietf/doc/views_status_change.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,12 @@ def newstatus(relateddoc):
282282
def default_approval_text(status_change,relateddoc):
283283

284284
filename = "%s-%s.txt" % (status_change.canonical_name(), status_change.rev)
285-
current_text = get_document_content(filename, os.path.join(settings.STATUS_CHANGE_PATH, filename), split=False, markup=False)
285+
current_text = get_document_content(filename, os.path.join(settings.STATUS_CHANGE_PATH, filename), split=False, markup=False).decode('utf-8')
286+
utext = status_change.text_or_error() # pyflakes:ignore
287+
if current_text and current_text != utext and not 'Error; cannot read' in current_text:
288+
debug.show('current_text[:64]')
289+
debug.show('utext[:64]')
290+
log.assertion('current_text == utext')
286291

287292
if relateddoc.target.document.std_level.slug in ('std','ps','ds','bcp',):
288293
action = "Protocol Action"

ietf/meeting/forms.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from ietf.message.models import Message
1919
from ietf.person.models import Person
2020
from ietf.utils.fields import DatepickerDateField, DurationField
21+
from ietf.utils import log
2122

2223
# need to insert empty option for use in ChoiceField
2324
# countries.insert(0, ('', '-'*9 ))
@@ -220,7 +221,14 @@ def __init__(self, *args, **kwargs):
220221
if self.instance.agenda():
221222
doc = self.instance.agenda()
222223
path = os.path.join(doc.get_file_path(), doc.filename_with_rev())
223-
self.initial['agenda'] = get_document_content(os.path.basename(path), path, markup=False)
224+
content = get_document_content(os.path.basename(path), path, markup=False).decode('utf-8')
225+
utext = doc.text_or_error() # pyflakes:ignore
226+
if content and content != utext and not 'Error; cannot read' in content:
227+
debug.show('content[:64]')
228+
debug.show('utext[:64]')
229+
log.assertion('content == utext')
230+
self.initial['agenda'] = content
231+
224232

225233
def clean_date(self):
226234
'''Date field validator. We can't use required on the input because

ietf/secr/telechat/views.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from django.shortcuts import render, get_object_or_404, redirect
77
from django.utils.functional import curry
88

9+
import debug # pyflakes:ignore
10+
911
from ietf.doc.models import DocEvent, Document, BallotDocEvent, BallotPositionDocEvent, BallotType, WriteupDocEvent
1012
from ietf.doc.utils import get_document_content, add_state_change_event
1113
from ietf.person.models import Person
@@ -15,7 +17,7 @@
1517
from ietf.iesg.agenda import agenda_data, get_doc_section
1618
from ietf.ietfauth.utils import role_required
1719
from ietf.secr.telechat.forms import BallotForm, ChangeStateForm, DateSelectForm, TELECHAT_TAGS
18-
20+
from ietf.utils import log
1921

2022

2123
'''
@@ -70,7 +72,12 @@ def get_doc_writeup(doc):
7072
writeup = latest.text
7173
elif doc.type_id == 'conflrev':
7274
path = os.path.join(doc.get_file_path(),doc.filename_with_rev())
73-
writeup = get_document_content(doc.name,path,split=False,markup=False)
75+
writeup = get_document_content(doc.name,path,split=False,markup=False).decode('utf-8')
76+
utext = doc.text_or_error() # pyflakes:ignore
77+
if writeup and writeup != utext and not 'Error; cannot read' in writeup:
78+
debug.show('writeup[:64]')
79+
debug.show('utext[:64]')
80+
log.assertion('writeup == utext')
7481
return writeup
7582

7683
def get_last_telechat_date():

ietf/utils/markup_txt.py

Lines changed: 24 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -30,26 +30,37 @@
3030
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
3131
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3232

33-
from django.utils.html import escape
34-
import string
3533
import re
34+
import six
35+
import string
3636

37+
from django.utils.html import escape
38+
39+
from ietf.utils import log
3740
from ietf.utils.text import wordwrap
3841

39-
def markup(content, split=True, width=None):
42+
def markup_ascii(content, width=None):
43+
log.unreachable('2017-12-08')
44+
if six.PY2:
45+
assert isinstance(content, basestring)
46+
# at this point, "content" is normal string
47+
# fix most common non-ASCII characters
48+
t1 = string.maketrans("\x91\x92\x93\x94\x95\x96\x97\xc6\xe8\xe9", "\'\'\"\"o--\'ee")
49+
# map everything except printable ASCII, TAB, LF, FF to "?"
50+
t2 = string.maketrans('','')
51+
t3 = "?"*9 + "\t\n?\f" + "?"*19 + t2[32:127] + "?"*129
52+
t4 = t1.translate(t3)
53+
content = content.translate(t4)
54+
else:
55+
log.assertion('six.PY2')
56+
return markup(content.decode('ascii'), width)
57+
58+
def markup(content, width=None):
59+
log.assertion('isinstance(content, six.text_type)')
4060
# normalize line endings to LF only
4161
content = content.replace("\r\n", "\n")
4262
content = content.replace("\r", "\n")
4363

44-
# at this point, "content" is normal string
45-
# fix most common non-ASCII characters
46-
t1 = string.maketrans("\x91\x92\x93\x94\x95\x96\x97\xc6\xe8\xe9", "\'\'\"\"o--\'ee")
47-
# map everything except printable ASCII, TAB, LF, FF to "?"
48-
t2 = string.maketrans('','')
49-
t3 = "?"*9 + "\t\n?\f" + "?"*19 + t2[32:127] + "?"*129
50-
t4 = t1.translate(t3)
51-
content = content.translate(t4)
52-
5364
# remove leading white space
5465
content = content.lstrip()
5566
# remove runs of blank lines
@@ -69,36 +80,4 @@ def markup(content, split=True, width=None):
6980

7081
content = re.sub("\n\n([0-9]+\\.|[A-Z]\\.[0-9]|Appendix|Status of|Abstract|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index)(.*)(?=\n\n)", """\n\n<span class="m_h">\g<1>\g<2></span>""", content)
7182

72-
if split:
73-
n = content.find("\n", 5000)
74-
content1 = "<pre>"+content[:n+1]+"</pre>\n"
75-
return content1
76-
#content2 = "<pre>"+content[n+1:]+"</pre>\n"
77-
#return (content1, content2)
78-
else:
79-
return "<pre>" + content + "</pre>\n"
80-
81-
def markup_unicode(content, split=True, width=None, container_classes=None):
82-
# normalize line endings to LF only
83-
content = content.replace("\r\n", "\n")
84-
content = content.replace("\r", "\n")
85-
86-
# remove leading white space
87-
content = content.lstrip()
88-
# remove runs of blank lines
89-
content = re.sub("\n\n\n+", "\n\n", content)
90-
91-
# maybe wordwrap. This must be done before the escaping below.
92-
if width:
93-
content = wordwrap(content, width)
94-
95-
# expand tabs + escape
96-
content_to_show = escape(content.expandtabs())
97-
98-
if split:
99-
n = content.find("\n", 5000)
100-
content_to_show = content_to_show[:n+1]
101-
102-
pre = '<pre class="%s" >' % container_classes if container_classes else '<pre>'
103-
104-
return pre+content_to_show+'</pre>\n'
83+
return "<pre>" + content + "</pre>\n"

0 commit comments

Comments
 (0)