forked from ietf-tools/datatracker
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathadd-old-drafts-from-archive.py
More file actions
executable file
·151 lines (135 loc) · 6.15 KB
/
add-old-drafts-from-archive.py
File metadata and controls
executable file
·151 lines (135 loc) · 6.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python
# Copyright The IETF Trust 2017-2019, All Rights Reserved
import datetime
import os
import sys
from pathlib import Path
from contextlib import closing
os.environ["DJANGO_SETTINGS_MODULE"] = "ietf.settings"
import django
django.setup()
from django.conf import settings
from django.core.validators import validate_email, ValidationError
from ietf.utils.draft import PlaintextDraft
from ietf.submit.utils import update_authors
from ietf.utils.timezone import date_today
import debug # pyflakes:ignore
from ietf.doc.models import Document, NewRevisionDocEvent, DocEvent, State
from ietf.person.models import Person
system = Person.objects.get(name="(System)")
expired = State.objects.get(type='draft',slug='expired')
names = set()
print 'collecting draft names ...'
versions = 0
for p in Path(settings.INTERNET_DRAFT_PATH).glob('draft*.txt'):
n = str(p).split('/')[-1].split('-')
if n[-1][:2].isdigit():
name = '-'.join(n[:-1])
if '--' in name or '.txt' in name or '[' in name or '=' in name or '&' in name:
continue
if name.startswith('draft-draft-'):
continue
if name == 'draft-ietf-trade-iotp-v1_0-dsig':
continue
if len(n[-1]) != 6:
continue
if name.startswith('draft-mlee-'):
continue
names.add('-'.join(n[:-1]))
count=0
print 'iterating through names ...'
for name in sorted(names):
if not Document.objects.filter(name=name).exists():
paths = list(Path(settings.INTERNET_DRAFT_PATH).glob('%s-??.txt'%name))
paths.sort()
doc = None
for p in paths:
n = str(p).split('/')[-1].split('-')
rev = n[-1][:2]
with open(str(p)) as txt_file:
raw = txt_file.read()
try:
text = raw.decode('utf8')
except UnicodeDecodeError:
text = raw.decode('latin1')
try:
draft = PlaintextDraft(text, txt_file.name, name_from_source=True)
except Exception as e:
print name, rev, "Can't parse", p,":",e
continue
if draft.errors and draft.errors.keys()!=['draftname',]:
print "Errors - could not process", name, rev, datetime.datetime.fromtimestamp(p.stat().st_mtime, datetime.timezone.utc), draft.errors, draft.get_title().encode('utf8')
else:
time = datetime.datetime.fromtimestamp(p.stat().st_mtime, datetime.timezone.utc)
if not doc:
doc = Document.objects.create(name=name,
time=time,
type_id='draft',
title=draft.get_title(),
abstract=draft.get_abstract(),
rev = rev,
pages=draft.get_pagecount(),
words=draft.get_wordcount(),
expires=time+datetime.timedelta(settings.INTERNET_DRAFT_DAYS_TO_EXPIRE),
)
DocAlias.objects.create(name=doc.name).docs.add(doc)
doc.states.add(expired)
# update authors
authors = []
for author in draft.get_author_list():
full_name, first_name, middle_initial, last_name, name_suffix, email, country, company = author
author_name = full_name.replace("\n", "").replace("\r", "").replace("<", "").replace(">", "").strip()
if email:
try:
validate_email(email)
except ValidationError:
email = ""
def turn_into_unicode(s):
if s is None:
return u""
if isinstance(s, unicode):
return s
else:
try:
return s.decode("utf-8")
except UnicodeDecodeError:
try:
return s.decode("latin-1")
except UnicodeDecodeError:
return ""
author_name = turn_into_unicode(author_name)
email = turn_into_unicode(email)
company = turn_into_unicode(company)
authors.append({
"name": author_name,
"email": email,
"affiliation": company,
"country": country
})
dummysubmission=type('', (), {})() #https://stackoverflow.com/questions/19476816/creating-an-empty-object-in-python
dummysubmission.authors = authors
update_authors(doc,dummysubmission)
# add a docevent with words explaining where this came from
events = []
e = NewRevisionDocEvent.objects.create(
type="new_revision",
doc=doc,
rev=rev,
by=system,
desc="New version available: <b>%s-%s.txt</b>" % (doc.name, doc.rev),
time=time,
)
events.append(e)
e = DocEvent.objects.create(
type="comment",
doc = doc,
rev = rev,
by = system,
desc = "Revision added from id-archive on %s by %s"%(date_today(),sys.argv[0]),
time=time,
)
events.append(e)
doc.time = time
doc.rev = rev
doc.save_with_history(events)
print "Added",name, rev