3131
3232"""
3333
34+ import datetime
3435import getopt
3536import os
3637import os .path
@@ -124,9 +125,11 @@ def __init__(self, text):
124125 self .filename , self .revision = self ._parse_draftname ()
125126
126127 self ._authors = None
128+ self ._abstract = None
127129 self ._pagecount = None
128130 self ._status = None
129131 self ._creation_date = None
132+ self ._title = None
130133
131134 # ------------------------------------------------------------------
132135 def _parse_draftname (self ):
@@ -241,7 +244,11 @@ def get_creation_date(self):
241244 if self ._creation_date :
242245 return self ._creation_date
243246 month_names = [ 'jan' , 'feb' , 'mar' , 'apr' , 'may' , 'jun' , 'jul' , 'aug' , 'sep' , 'oct' , 'nov' , 'dec' ]
244- date_regexes = [
247+ date_regexes = [
248+ r'^(?P<month>\w+)\s+(?P<day>\d{1,2}),?\s+(?P<year>\d{4})' ,
249+ r'^(?P<day>\d{1,2}),?\s+(?P<month>\w+)\s+(?P<year>\d{4})' ,
250+ r'^(?P<day>\d{1,2})-(?P<month>\w+)-(?P<year>\d{4})' ,
251+ r'^(?P<month>\w+)\s+(?P<year>\d{4})' ,
245252 r'\s{3,}(?P<month>\w+)\s+(?P<day>\d{1,2}),?\s+(?P<year>\d{4})' ,
246253 r'\s{3,}(?P<day>\d{1,2}),?\s+(?P<month>\w+)\s+(?P<year>\d{4})' ,
247254 r'\s{3,}(?P<day>\d{1,2})-(?P<month>\w+)-(?P<year>\d{4})' ,
@@ -250,24 +257,87 @@ def get_creation_date(self):
250257 ]
251258
252259 for regex in date_regexes :
253- match = re .search (regex , self .pages [0 ])
260+ match = re .search (regex , self .pages [0 ], re . MULTILINE )
254261 if match :
255- md = match .groupdict ()
256- mon = md ['month' ][0 :3 ].lower ()
257- day = int ( md .get ( 'day' , date .today ().day ) )
258- year = int ( md ['year' ] )
259- try :
260- month = month_names .index ( mon ) + 1
261- self ._creation_date = date (year , month , day )
262+ md = match .groupdict ()
263+ mon = md ['month' ][0 :3 ].lower ()
264+ day = int ( md .get ( 'day' , datetime . date .today ().day ) )
265+ year = int ( md ['year' ] )
266+ try :
267+ month = month_names .index ( mon ) + 1
268+ self ._creation_date = datetime . date (year , month , day )
262269 return self ._creation_date
263- except ValueError :
264- # mon abbreviation not in _MONTH_NAMES
265- # or month or day out of range
266- pass
270+ except ValueError :
271+ # mon abbreviation not in _MONTH_NAMES
272+ # or month or day out of range
273+ pass
267274 self .errors ['creation_date' ] = 'Creation Date field is empty or the creation date is not in a proper format.'
268275 return self ._creation_date
269276
270277
278+ # ------------------------------------------------------------------
279+ def get_abstract (self ):
280+ if self ._abstract :
281+ return self ._abstract
282+ abstract_re = re .compile ('^(\s*)abstract' , re .I )
283+ header_re = re .compile ("^(\s*)(1\.|A\.|Appendix|Status of|Table of|Full Copyright|Copyright|Intellectual Property|Acknowled|Author|Index).*" , re .I )
284+ begin = False
285+ abstract = []
286+ abstract_indent = 0
287+ for line in self .lines :
288+ if not begin and abstract_re .match (line ):
289+ begin = True
290+ abstract_indent = len (abstract_re .match (line ).group (0 ))
291+ continue
292+ if begin :
293+ if header_re .match (line ):
294+ break
295+ if not line and not abstract :
296+ continue
297+ abstract .append (line )
298+ abstract = '\n ' .join (abstract )
299+ abstract = self ._clean_abstract (abstract )
300+ self ._abstract = self ._check_abstract_indent (abstract , abstract_indent )
301+ return self ._abstract
302+
303+
304+ def _check_abstract_indent (self , abstract , indent ):
305+ indentation_re = re .compile ('^(\s)*' )
306+ indent_lines = []
307+ for line in abstract .split ('\n ' ):
308+ if line :
309+ indent = len (indentation_re .match (line ).group (0 ))
310+ indent_lines .append (indent )
311+ percents = {}
312+ total = float (len (indent_lines ))
313+ formated = False
314+ for indent in set (indent_lines ):
315+ count = indent_lines .count (indent )/ total
316+ percents [indent ] = count
317+ if count > 0.9 :
318+ formated = True
319+ if not formated :
320+ return abstract
321+ new_abstract = []
322+ for line in abstract .split ('\n ' ):
323+ if line :
324+ indent = len (indentation_re .match (line ).group (0 ))
325+ if percents [indent ] < 0.9 :
326+ break
327+ new_abstract .append (line )
328+ return '\n ' .join (new_abstract )
329+
330+
331+ def _clean_abstract (self , text ):
332+ text = re .sub ("(?s)(Conventions [Uu]sed in this [Dd]ocument|Requirements [Ll]anguage)?[\n ]*The key words \" MUST\" , \" MUST NOT\" ,.*$" , "" , text )
333+ # Get rid of status/copyright boilerplate
334+ text = re .sub ("(?s)\n Status of [tT]his Memo\n .*$" , "" , text )
335+ # wrap long lines without messing up formatting of Ok paragraphs:
336+ while re .match ("([^\n ]{72,}?) +" , text ):
337+ text = re .sub ("([^\n ]{72,}?) +([^\n ]*)(\n |$)" , "\\ 1\n \\ 2 " , text )
338+ return text
339+
340+
271341 # ------------------------------------------------------------------
272342 def get_authors (self ):
273343 """Extract author information from draft text.
@@ -495,13 +565,35 @@ def get_authors(self):
495565 _debug ("Not an author? '%s'" % (author ))
496566
497567 authors = [ re .sub (r" +" ," " , a ) for a in authors if a != None ]
498- authors .sort ()
568+ # authors.sort()
499569 _debug (" * Final author list: " + ", " .join (authors ))
500570 _debug ("-" * 72 )
501571 self ._authors = authors
502572
503573 return self ._authors
504574
575+ # ------------------------------------------------------------------
576+ def get_title (self ):
577+ if self ._title :
578+ return self ._title
579+ title_re = re .compile ('(.+\n ){1,3}(\s+<?draft-\S+\s*\n )' )
580+ match = title_re .search (self .pages [0 ])
581+ if match :
582+ title = match .group (1 )
583+ title = title .strip ()
584+ self ._title = title
585+ return self ._title
586+ # unusual title extract
587+ unusual_title_re = re .compile ('(.+\n |.+\n .+\n )(\s*status of this memo\s*\n )' , re .I )
588+ match = unusual_title_re .search (self .pages [0 ])
589+ if match :
590+ title = match .group (1 )
591+ title = title .strip ()
592+ self ._title = title
593+ return self ._title
594+ self .errors ["title" ] = "Could not find the title on the first page."
595+
596+
505597# ----------------------------------------------------------------------
506598def _output (fields ):
507599 if opt_timestamp :
@@ -546,11 +638,16 @@ def _printmeta(timestamp, fn):
546638 fields ["doctag" ] = draft .filename or fn [:- 7 ]
547639 fields ["docrev" ] = draft .revision
548640
641+ fields ["doctitle" ] = draft .get_title ()
549642 fields ["docpages" ] = str (draft .get_pagecount ())
550643 fields ["docauthors" ] = ", " .join (draft .get_authors ())
644+ fields ["doccreationdate" ] = str (draft .get_creation_date ())
551645 deststatus = draft .get_status ()
552646 if deststatus :
553647 fields ["docdeststatus" ] = deststatus
648+ abstract = draft .get_abstract ()
649+ if abstract :
650+ fields ["docabstract" ] = abstract
554651
555652 _output (fields )
556653
0 commit comments