Skip to content

Commit ac6a069

Browse files
committed
Merged in [14138] from rjsparks@nostrum.com:
script to process the id-archive and add Document objects for drafts that are currently missing from the datatracker. Fixes ietf-tools#1316. - Legacy-Id: 14142 Note: SVN reference [14138] has been migrated to Git commit 3a7d0d6
2 parents 2c1438c + 3a7d0d6 commit ac6a069

1 file changed

Lines changed: 149 additions & 0 deletions

File tree

bin/add-old-drafts-from-archive.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
#!/usr/bin/env python
2+
3+
import datetime
4+
import os
5+
import sys
6+
from pathlib2 import Path
7+
from contextlib import closing
8+
9+
os.environ["DJANGO_SETTINGS_MODULE"] = "ietf.settings"
10+
11+
import django
12+
django.setup()
13+
14+
from django.conf import settings
15+
from django.core.validators import validate_email, ValidationError
16+
from ietf.utils.draft import Draft
17+
from ietf.submit.utils import update_authors
18+
19+
import debug # pyflakes:ignore
20+
21+
from ietf.doc.models import Document, NewRevisionDocEvent, DocEvent, State
22+
from ietf.person.models import Person
23+
24+
system = Person.objects.get(name="(System)")
25+
expired = State.objects.get(type='draft',slug='expired')
26+
27+
names = set()
28+
print 'collecting draft names ...'
29+
versions = 0
30+
for p in Path(settings.INTERNET_DRAFT_PATH).glob('draft*.txt'):
31+
n = str(p).split('/')[-1].split('-')
32+
if n[-1][:2].isdigit():
33+
name = '-'.join(n[:-1])
34+
if '--' in name or '.txt' in name or '[' in name or '=' in name or '&' in name:
35+
continue
36+
if name.startswith('draft-draft-'):
37+
continue
38+
if name == 'draft-ietf-trade-iotp-v1_0-dsig':
39+
continue
40+
if len(n[-1]) != 6:
41+
continue
42+
if name.startswith('draft-mlee-'):
43+
continue
44+
names.add('-'.join(n[:-1]))
45+
46+
count=0
47+
print 'iterating through names ...'
48+
for name in sorted(names):
49+
if not Document.objects.filter(name=name).exists():
50+
paths = list(Path(settings.INTERNET_DRAFT_PATH).glob('%s-??.txt'%name))
51+
paths.sort()
52+
doc = None
53+
for p in paths:
54+
n = str(p).split('/')[-1].split('-')
55+
rev = n[-1][:2]
56+
with open(str(p)) as txt_file:
57+
raw = txt_file.read()
58+
try:
59+
text = raw.decode('utf8')
60+
except UnicodeDecodeError:
61+
text = raw.decode('latin1')
62+
try:
63+
draft = Draft(text, txt_file.name, name_from_source=True)
64+
except Exception as e:
65+
print name, rev, "Can't parse", p,":",e
66+
continue
67+
if draft.errors and draft.errors.keys()!=['draftname',]:
68+
print "Errors - could not process", name, rev, datetime.datetime.fromtimestamp(p.stat().st_mtime), draft.errors, draft.get_title().encode('utf8')
69+
else:
70+
time = datetime.datetime.fromtimestamp(p.stat().st_mtime)
71+
if not doc:
72+
doc = Document.objects.create(name=name,
73+
time=time,
74+
type_id='draft',
75+
title=draft.get_title(),
76+
abstract=draft.get_abstract(),
77+
rev = rev,
78+
pages=draft.get_pagecount(),
79+
words=draft.get_wordcount(),
80+
expires=time+datetime.timedelta(settings.INTERNET_DRAFT_DAYS_TO_EXPIRE),
81+
)
82+
doc.docalias_set.create(name=doc.name)
83+
doc.states.add(expired)
84+
# update authors
85+
authors = []
86+
for author in draft.get_author_list():
87+
full_name, first_name, middle_initial, last_name, name_suffix, email, country, company = author
88+
89+
author_name = full_name.replace("\n", "").replace("\r", "").replace("<", "").replace(">", "").strip()
90+
91+
if email:
92+
try:
93+
validate_email(email)
94+
except ValidationError:
95+
email = ""
96+
97+
def turn_into_unicode(s):
98+
if s is None:
99+
return u""
100+
101+
if isinstance(s, unicode):
102+
return s
103+
else:
104+
try:
105+
return s.decode("utf-8")
106+
except UnicodeDecodeError:
107+
try:
108+
return s.decode("latin-1")
109+
except UnicodeDecodeError:
110+
return ""
111+
112+
author_name = turn_into_unicode(author_name)
113+
email = turn_into_unicode(email)
114+
company = turn_into_unicode(company)
115+
116+
authors.append({
117+
"name": author_name,
118+
"email": email,
119+
"affiliation": company,
120+
"country": country
121+
})
122+
dummysubmission=type('', (), {})() #https://stackoverflow.com/questions/19476816/creating-an-empty-object-in-python
123+
dummysubmission.authors = authors
124+
update_authors(doc,dummysubmission)
125+
126+
# add a docevent with words explaining where this came from
127+
events = []
128+
e = NewRevisionDocEvent.objects.create(
129+
type="new_revision",
130+
doc=doc,
131+
rev=rev,
132+
by=system,
133+
desc="New version available: <b>%s-%s.txt</b>" % (doc.name, doc.rev),
134+
time=time,
135+
)
136+
events.append(e)
137+
e = DocEvent.objects.create(
138+
type="comment",
139+
doc = doc,
140+
rev = rev,
141+
by = system,
142+
desc = "Revision added from id-archive on %s by %s"%(datetime.date.today(),sys.argv[0]),
143+
time=time,
144+
)
145+
events.append(e)
146+
doc.time = time
147+
doc.rev = rev
148+
doc.save_with_history(events)
149+
print "Added",name, rev

0 commit comments

Comments
 (0)