Skip to content

Commit 5ab9f02

Browse files
committed
Merged in changes from Yaco @2855.
- Legacy-Id: 2915
1 parent 2f1426e commit 5ab9f02

1 file changed

Lines changed: 111 additions & 14 deletions

File tree

ietf/utils/draft.py

Lines changed: 111 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
3232
"""
3333

34+
import datetime
3435
import getopt
3536
import os
3637
import os.path
@@ -124,9 +125,11 @@ def __init__(self, text):
124125
self.filename, self.revision = self._parse_draftname()
125126

126127
self._authors = None
128+
self._abstract = None
127129
self._pagecount = None
128130
self._status = None
129131
self._creation_date = None
132+
self._title = None
130133

131134
# ------------------------------------------------------------------
132135
def _parse_draftname(self):
@@ -241,7 +244,11 @@ def get_creation_date(self):
241244
if self._creation_date:
242245
return self._creation_date
243246
month_names = [ 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ]
244-
date_regexes = [
247+
date_regexes = [
248+
r'^(?P<month>\w+)\s+(?P<day>\d{1,2}),?\s+(?P<year>\d{4})',
249+
r'^(?P<day>\d{1,2}),?\s+(?P<month>\w+)\s+(?P<year>\d{4})',
250+
r'^(?P<day>\d{1,2})-(?P<month>\w+)-(?P<year>\d{4})',
251+
r'^(?P<month>\w+)\s+(?P<year>\d{4})',
245252
r'\s{3,}(?P<month>\w+)\s+(?P<day>\d{1,2}),?\s+(?P<year>\d{4})',
246253
r'\s{3,}(?P<day>\d{1,2}),?\s+(?P<month>\w+)\s+(?P<year>\d{4})',
247254
r'\s{3,}(?P<day>\d{1,2})-(?P<month>\w+)-(?P<year>\d{4})',
@@ -250,24 +257,87 @@ def get_creation_date(self):
250257
]
251258

252259
for regex in date_regexes:
253-
match = re.search(regex, self.pages[0])
260+
match = re.search(regex, self.pages[0], re.MULTILINE)
254261
if match:
255-
md = match.groupdict()
256-
mon = md['month'][0:3].lower()
257-
day = int( md.get( 'day', date.today().day ) )
258-
year = int( md['year'] )
259-
try:
260-
month = month_names.index( mon ) + 1
261-
self._creation_date = date(year, month, day)
262+
md = match.groupdict()
263+
mon = md['month'][0:3].lower()
264+
day = int( md.get( 'day', datetime.date.today().day ) )
265+
year = int( md['year'] )
266+
try:
267+
month = month_names.index( mon ) + 1
268+
self._creation_date = datetime.date(year, month, day)
262269
return self._creation_date
263-
except ValueError:
264-
# mon abbreviation not in _MONTH_NAMES
265-
# or month or day out of range
266-
pass
270+
except ValueError:
271+
# mon abbreviation not in _MONTH_NAMES
272+
# or month or day out of range
273+
pass
267274
self.errors['creation_date'] = 'Creation Date field is empty or the creation date is not in a proper format.'
268275
return self._creation_date
269276

270277

278+
# ------------------------------------------------------------------
279+
def get_abstract(self):
280+
if self._abstract:
281+
return self._abstract
282+
abstract_re = re.compile('^(\s*)abstract', re.I)
283+
header_re = re.compile("^(\s*)(1\.|A\.|Appendix|Status of|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index).*", re.I)
284+
begin = False
285+
abstract = []
286+
abstract_indent = 0
287+
for line in self.lines:
288+
if not begin and abstract_re.match(line):
289+
begin=True
290+
abstract_indent = len(abstract_re.match(line).group(0))
291+
continue
292+
if begin:
293+
if header_re.match(line):
294+
break
295+
if not line and not abstract:
296+
continue
297+
abstract.append(line)
298+
abstract = '\n'.join(abstract)
299+
abstract = self._clean_abstract(abstract)
300+
self._abstract = self._check_abstract_indent(abstract, abstract_indent)
301+
return self._abstract
302+
303+
304+
def _check_abstract_indent(self, abstract, indent):
305+
indentation_re = re.compile('^(\s)*')
306+
indent_lines = []
307+
for line in abstract.split('\n'):
308+
if line:
309+
indent = len(indentation_re.match(line).group(0))
310+
indent_lines.append(indent)
311+
percents = {}
312+
total = float(len(indent_lines))
313+
formated = False
314+
for indent in set(indent_lines):
315+
count = indent_lines.count(indent)/total
316+
percents[indent] = count
317+
if count > 0.9:
318+
formated = True
319+
if not formated:
320+
return abstract
321+
new_abstract = []
322+
for line in abstract.split('\n'):
323+
if line:
324+
indent = len(indentation_re.match(line).group(0))
325+
if percents[indent] < 0.9:
326+
break
327+
new_abstract.append(line)
328+
return '\n'.join(new_abstract)
329+
330+
331+
def _clean_abstract(self, text):
332+
text = re.sub("(?s)(Conventions [Uu]sed in this [Dd]ocument|Requirements [Ll]anguage)?[\n ]*The key words \"MUST\", \"MUST NOT\",.*$", "", text)
333+
# Get rid of status/copyright boilerplate
334+
text = re.sub("(?s)\nStatus of [tT]his Memo\n.*$", "", text)
335+
# wrap long lines without messing up formatting of Ok paragraphs:
336+
while re.match("([^\n]{72,}?) +", text):
337+
text = re.sub("([^\n]{72,}?) +([^\n ]*)(\n|$)", "\\1\n\\2 ", text)
338+
return text
339+
340+
271341
# ------------------------------------------------------------------
272342
def get_authors(self):
273343
"""Extract author information from draft text.
@@ -495,13 +565,35 @@ def get_authors(self):
495565
_debug("Not an author? '%s'" % (author))
496566

497567
authors = [ re.sub(r" +"," ", a) for a in authors if a != None ]
498-
authors.sort()
568+
# authors.sort()
499569
_debug(" * Final author list: " + ", ".join(authors))
500570
_debug("-"*72)
501571
self._authors = authors
502572

503573
return self._authors
504574

575+
# ------------------------------------------------------------------
576+
def get_title(self):
577+
if self._title:
578+
return self._title
579+
title_re = re.compile('(.+\n){1,3}(\s+<?draft-\S+\s*\n)')
580+
match = title_re.search(self.pages[0])
581+
if match:
582+
title = match.group(1)
583+
title = title.strip()
584+
self._title = title
585+
return self._title
586+
# unusual title extract
587+
unusual_title_re = re.compile('(.+\n|.+\n.+\n)(\s*status of this memo\s*\n)', re.I)
588+
match = unusual_title_re.search(self.pages[0])
589+
if match:
590+
title = match.group(1)
591+
title = title.strip()
592+
self._title = title
593+
return self._title
594+
self.errors["title"] = "Could not find the title on the first page."
595+
596+
505597
# ----------------------------------------------------------------------
506598
def _output(fields):
507599
if opt_timestamp:
@@ -546,11 +638,16 @@ def _printmeta(timestamp, fn):
546638
fields["doctag"] = draft.filename or fn[:-7]
547639
fields["docrev"] = draft.revision
548640

641+
fields["doctitle"] = draft.get_title()
549642
fields["docpages"] = str(draft.get_pagecount())
550643
fields["docauthors"] = ", ".join(draft.get_authors())
644+
fields["doccreationdate"] = str(draft.get_creation_date())
551645
deststatus = draft.get_status()
552646
if deststatus:
553647
fields["docdeststatus"] = deststatus
648+
abstract = draft.get_abstract()
649+
if abstract:
650+
fields["docabstract"] = abstract
554651

555652
_output(fields)
556653

0 commit comments

Comments
 (0)