@@ -68,3 +68,268 @@ def parse_critical_003_wg(self):
6868 self .parsed_info .add_error ('Invalid WG ID: %s' % group_acronym )
6969 else :
7070 self .parsed_info .metadraft .wg = IETFWG .objects .get (pk = NONE_WG_PK )
71+
72+ def parse_critical_authors (self ):
73+ """
74+ comes from http://svn.tools.ietf.org/svn/tools/ietfdb/branch/idsubmit/ietf/utils/draft.py
75+ """
76+
77+ def _stripheaders (rawlines ):
78+ stripped = []
79+ pages = []
80+ page = []
81+ line = ""
82+ debug = False
83+ newpage = False
84+ sentence = False
85+ haveblank = False
86+
87+ def endpage (pages , page , line ):
88+ if line :
89+ page += [ line ]
90+ return begpage (pages , page )
91+ def begpage (pages , page , line = None ):
92+ if page and len (page ) > 5 :
93+ pages += [ "\n " .join (page ) ]
94+ page = []
95+ newpage = True
96+ if line :
97+ page += [ line ]
98+ return pages , page
99+
100+ for line in rawlines :
101+ line = line .rstrip ()
102+ if re .search ("\[?[Pp]age [0-9ivx]+\]?[ \t \f ]*$" , line , re .I ):
103+ pages , page = endpage (pages , page , line )
104+ continue
105+ if re .search ("\f " , line , re .I ):
106+ pages , page = begpage (pages , page )
107+ continue
108+ if re .search ("^ *Internet.Draft.+[12][0-9][0-9][0-9] *$" , line , re .I ):
109+ pages , page = begpage (pages , page , line )
110+ continue
111+ if re .search ("^ *Draft.+[12][0-9][0-9][0-9] *$" , line , re .I ):
112+ pages , page = begpage (pages , page , line )
113+ continue
114+ if re .search ("^RFC[ -]?[0-9]+.*( +)[12][0-9][0-9][0-9]$" , line , re .I ):
115+ pages , page = begpage (pages , page , line )
116+ continue
117+ if re .search ("^draft-[-a-z0-9_.]+.*[0-9][0-9][0-9][0-9]$" , line , re .I ):
118+ pages , page = endpage (pages , page , line )
119+ continue
120+ if re .search (".{60,}(Jan|Feb|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|Sep|Oct|Nov|Dec) (19[89][0-9]|20[0-9][0-9]) *$" , line , re .I ):
121+ pages , page = begpage (pages , page , line )
122+ continue
123+ if newpage and re .search ("^ *draft-[-a-z0-9_.]+ *$" , line , re .I ):
124+ pages , page = begpage (pages , page , line )
125+ continue
126+ if re .search ("^[^ \t ]+" , line ):
127+ sentence = True
128+ if re .search ("[^ \t ]" , line ):
129+ if newpage :
130+ if sentence :
131+ stripped += ["" ]
132+ else :
133+ if haveblank :
134+ stripped += ["" ]
135+ haveblank = False
136+ sentence = False
137+ newpage = False
138+ if re .search ("[.:]$" , line ):
139+ sentence = True
140+ if re .search ("^[ \t ]*$" , line ):
141+ haveblank = True
142+ page += [ line ]
143+ continue
144+ page += [ line ]
145+ stripped += [ line ]
146+ pages , page = begpage (pages , page )
147+ return stripped , pages
148+
149+ self .fd .file .seek (0 )
150+ raw_lines = self .fd .file .read ().split ("\n " )
151+ draft_lines , draft_pages = _stripheaders (raw_lines )
152+
153+ longform = {
154+ "Beth" : "Elizabeth" ,
155+ "Bill" : "William" ,
156+ "Bob" : "Robert" ,
157+ "Dick" : "Richard" ,
158+ "Fred" : "Alfred" ,
159+ "Jerry" : "Gerald" ,
160+ "Liz" : "Elizabeth" ,
161+ "Lynn" : "Carolyn" ,
162+ "Ned" : "Edward" ,
163+ "Ted" :"Edward" ,
164+ }
165+ aux = {
166+ "honor" : r"(?:Dr\.?|Prof(?:\.?|essor)|Sir|Lady|Dame)" ,
167+ "prefix" : r"([Dd]e|Hadi|van|van de|van der|Ver|von)" ,
168+ "suffix" : r"(jr|II|2nd|III|3rd|IV|4th)" ,
169+ "first" : r"([A-Z][-A-Za-z]*)((\.?[- ]{1,2}[A-Za-z]+)*)" ,
170+ "last" : r"([-A-Za-z']{2,})" ,
171+ }
172+ authformats = [
173+ r" {6}(%(first)s[ \.]{1,3}((%(prefix)s )?%(last)s)( %(suffix)s)?)([, ]?(.+\.?|\(.+\.?|\)))?$" % aux ,
174+ r" {6}(((%(prefix)s )?%(last)s)( %(suffix)s)?, %(first)s)([, ]([Ee]d\.?|\([Ee]d\.?\)))?$" % aux ,
175+ r" {6}(%(last)s)$" % aux ,
176+ ]
177+
178+ authors = []
179+ companies = []
180+
181+ # Collect first-page author information first
182+ have_blankline = False
183+ have_draftline = False
184+ prev_blankline = False
185+ for line in draft_lines [:15 ]:
186+ leading_space = len (re .findall ("^ *" , line )[0 ])
187+ line_len = len (line .rstrip ())
188+ trailing_space = line_len <= 72 and 72 - line_len or 0
189+ # Truncate long lines at the first space past column 80:
190+ trunc_space = line .find (" " , 80 )
191+ if line_len > 80 and trunc_space > - 1 :
192+ line = line [:trunc_space ]
193+ if line_len > 60 :
194+ # Look for centered title, break if found:
195+ if (leading_space > 5 and abs (leading_space - trailing_space ) < 5 ):
196+ break
197+ for authformat in authformats :
198+ match = re .search (authformat , line )
199+ if match :
200+ author = match .group (1 )
201+ authors += [ author ]
202+ if line .strip () == "" :
203+ if prev_blankline :
204+ break
205+ have_blankline = True
206+ prev_blankline = True
207+ else :
208+ prev_blankline = False
209+ if "draft-" in line :
210+ have_draftline = True
211+ if have_blankline and have_draftline :
212+ break
213+
214+ found_pos = []
215+ for i in range (len (authors )):
216+ author = authors [i ]
217+ if author == None :
218+ continue
219+ if "," in author :
220+ last , first = author .split ("," ,1 )
221+ author = "%s %s" % (first .strip (), last .strip ())
222+ if not " " in author :
223+ if "." in author :
224+ first , last = author .rsplit ("." , 1 )
225+ first += "."
226+ else :
227+ author = "[A-Z].+ " + author
228+ first , last = author .rsplit (" " , 1 )
229+ else :
230+ first , last = author .rsplit (" " , 1 )
231+
232+ for author in [ "%s %s" % (first ,last ), "%s %s" % (last ,first ), ]:
233+ # Pattern for full author information search, based on first page author name:
234+ authpat = author
235+ # Permit expansion of first name
236+ authpat = re .sub ("\. " , ".* " , authpat )
237+ authpat = re .sub ("\.$" , ".*" , authpat )
238+ # Permit insertsion of middle name or initial
239+ authpat = re .sub (" " , "\S*( +[^ ]+)* +" , authpat )
240+ # Permit expansion of double-name initials
241+ authpat = re .sub ("-" , ".*?-" , authpat )
242+ # Some chinese names are shown with double-letter(latin) abbreviated given names, rather than
243+ # a single-letter(latin) abbreviation:
244+ authpat = re .sub ("^([A-Z])[A-Z]+\.\*" , r"\1[-\w]+" , authpat )
245+ authpat = "^(?:%s ?)?(%s)( *\(.*\)|,( [A-Z][-A-Za-z0-9]*)?)?" % (aux ["honor" ], authpat )
246+ start = 0
247+ col = None
248+
249+ # Find start of author info for this author (if any).
250+ # Scan from the end of the file, looking for a match to authpath
251+ try :
252+ for j in range (len (draft_lines )- 1 , 15 , - 1 ):
253+ line = draft_lines [j ].strip ()
254+ forms = [ line ] + [ line .replace (short , longform [short ]) for short in longform if short in line ]
255+ for line in forms :
256+ if re .search (authpat , line ):
257+ start = j
258+ columns = re .split ("( +)" , line )
259+ # Find which column:
260+ cols = [ c for c in range (len (columns )) if re .search (authpat + r"$" , columns [c ].strip ()) ]
261+ if cols :
262+ col = cols [0 ]
263+ if not (start , col ) in found_pos :
264+ found_pos += [ (start , col ) ]
265+ beg = len ("" .join (columns [:col ]))
266+ if col == len (columns ) or col == len (columns )- 1 :
267+ end = None
268+ else :
269+ end = beg + len ("" .join (columns [col :col + 2 ]))
270+ author = re .search (authpat , columns [col ].strip ()).group (1 )
271+ if author in companies :
272+ authors [i ] = None
273+ else :
274+ authors [i ] = author
275+
276+ raise StopIteration ("Found Author" )
277+ except StopIteration :
278+ pass
279+ if start and col != None :
280+ break
281+ if not authors [i ]:
282+ continue
283+
284+ if start and col != None :
285+ done = False
286+ count = 0
287+ keyword = False
288+ blanklines = 0
289+ for line in draft_lines [start + 1 :]:
290+ # Break on the second blank line
291+ if not line :
292+ blanklines += 1
293+ if blanklines >= 3 :
294+ break
295+ else :
296+ continue
297+ else :
298+ count += 1
299+ authmatch = [ a for a in authors [i + 1 :] if a and not a in companies and re .search ((r"(^|\W)" + re .sub ("\.? " , ".* " , a )+ "(\W|$)" ), line .strip ()) ]
300+ if authmatch :
301+ if count == 1 or (count == 2 and not blanklines ):
302+ # First line after an author -- this is a company
303+ companies += authmatch
304+ companies += [ line .strip () ] # XXX fix this for columnized author list
305+ companies = list (set (companies ))
306+ for k in range (i + 1 , len (authors )):
307+ if authors [k ] in companies :
308+ authors [k ] = None
309+ elif not "@" in line :
310+ break
311+ else :
312+ pass
313+
314+ try :
315+ column = line [beg :end ].strip ()
316+ except :
317+ column = line
318+ column = re .sub (" *\(at\) *" , "@" , column )
319+ column = re .sub (" *\(dot\) *" , "." , column )
320+
321+ emailmatch = re .search ("[-A-Za-z0-9_.+]+@[-A-Za-z0-9_.]+" , column )
322+ if emailmatch and not "@" in authors [i ]:
323+ email = emailmatch .group (0 ).lower ()
324+ authors [i ] = "%s <%s>" % (authors [i ], email )
325+ else :
326+ authors [i ] = None
327+
328+ authors = [ re .sub (r" +" ," " , a ) for a in authors if a != None ]
329+ if authors :
330+ authors .sort ()
331+ self .parsed_info .metadraft .authors = authors
332+ else :
333+ self .parsed_info .errors .append ("Draft authors could not be found." )
334+
335+ return authors
0 commit comments