Skip to content

Commit 3d1eb07

Browse files
committed
Port idindex to new schema, speed them up, add tests, refactor index
page in views_search to share code with the text index file, get rid of some special-case idindex filters from ietf_filters, move "/drafts/" redirects to a file in /doc/ - Legacy-Id: 5634
1 parent 4f7de7b commit 3d1eb07

20 files changed

Lines changed: 482 additions & 438 deletions

ietf/doc/redirect_drafts_urls.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright The IETF Trust 2007, All Rights Reserved
2+
3+
from django.conf import settings
4+
from django.conf.urls.defaults import patterns
5+
6+
7+
from django.http import HttpResponsePermanentRedirect
8+
from django.shortcuts import get_object_or_404
9+
10+
from ietf.group.models import Group
11+
12+
urlpatterns = patterns('',
13+
(r'^$', 'django.views.generic.simple.redirect_to', { 'url': '/doc/'}),
14+
(r'^all/$', 'django.views.generic.simple.redirect_to', { 'url': '/doc/all/'}),
15+
(r'^rfc/$', 'django.views.generic.simple.redirect_to', { 'url': '/doc/all/#rfc'}),
16+
(r'^dead/$', 'django.views.generic.simple.redirect_to', { 'url': '/doc/all/#expired'}),
17+
(r'^current/$', 'django.views.generic.simple.redirect_to', { 'url': '/doc/active/'}),
18+
(r'^(?P<object_id>\d+)/(related/)?$', 'django.views.generic.simple.redirect_to', { 'url': '/doc/' }),
19+
(r'^(?P<name>[^/]+)/(related/)?$', 'django.views.generic.simple.redirect_to', { 'url': '/doc/%(name)s/' }),
20+
(r'^wgid/(?P<id>\d+)/$', lambda request, id: HttpResponsePermanentRedirect("/wg/%s/" % get_object_or_404(Group, id=id).acronym)),
21+
(r'^wg/(?P<acronym>[^/]+)/$', 'django.views.generic.simple.redirect_to', { 'url': '/wg/%(acronym)s/' }),
22+
(r'^all_id(?:_txt)?.html$', 'django.views.generic.simple.redirect_to', { 'url': 'http://www.ietf.org/id/all_id.txt' }),
23+
)

ietf/doc/tests.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import django.test
44
from django.core.urlresolvers import reverse as urlreverse
5-
from django.conf import settings
65

76
from pyquery import PyQuery
87

ietf/idindex/generate_all_id2_txt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@
3434
from django.core import management
3535
management.setup_environ(settings)
3636

37-
from ietf.idindex.views import all_id2_txt
37+
from ietf.idindex.index import all_id2_txt
3838
print all_id2_txt().encode('utf-8'),

ietf/idindex/generate_all_id_txt.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@
3434
from django.core import management
3535
management.setup_environ(settings)
3636

37-
from ietf.idindex.views import all_id_txt
38-
print all_id_txt(),
37+
from ietf.idindex.index import all_id_txt
38+
print all_id_txt().encode("utf-8"),

ietf/idindex/generate_id_abstracts_txt.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,5 @@
3434
from django.core import management
3535
management.setup_environ(settings)
3636

37-
from ietf.idindex.views import id_abstracts_txt
38-
x = id_abstracts_txt()
39-
if isinstance(x, unicode):
40-
print x.encode('utf-8'),
41-
else:
42-
print x,
37+
from ietf.idindex.index import id_index_txt
38+
print id_index_txt(with_abstracts=True).encode('utf-8'),

ietf/idindex/generate_id_index_txt.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,5 @@
3434
from django.core import management
3535
management.setup_environ(settings)
3636

37-
from ietf.idindex.views import id_index_txt
38-
x = id_index_txt()
39-
if isinstance(x, unicode):
40-
print x.encode('utf-8'),
41-
else:
42-
print x,
37+
from ietf.idindex.index import id_index_txt
38+
print id_index_txt().encode('utf-8'),

ietf/idindex/index.py

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
import datetime, os
2+
3+
import pytz
4+
5+
from django.conf import settings
6+
from django.template.loader import render_to_string
7+
8+
from ietf.idtracker.templatetags.ietf_filters import clean_whitespace
9+
from ietf.doc.models import *
10+
11+
def all_id_txt():
12+
# this returns a lot of data so try to be efficient
13+
14+
# precalculations
15+
revision_time = dict(NewRevisionDocEvent.objects.filter(type="new_revision", doc__name__startswith="draft-").order_by('time').values_list("doc_id", "time"))
16+
17+
def formatted_rev_date(name):
18+
t = revision_time.get(name)
19+
return t.strftime("%Y-%m-%d") if t else ""
20+
21+
rfc_aliases = dict(DocAlias.objects.filter(name__startswith="rfc",
22+
document__states=State.objects.get(type="draft", slug="rfc")).values_list("document_id", "name"))
23+
24+
replacements = dict(RelatedDocument.objects.filter(target__document__states=State.objects.get(type="draft", slug="repl"),
25+
relationship="replaces").values_list("target__document_id", "source"))
26+
27+
28+
# we need a distinct to prevent the queries below from multiplying the result
29+
all_ids = Document.objects.filter(type="draft").order_by('name').exclude(name__startswith="rfc").distinct()
30+
31+
res = ["\nInternet-Drafts Status Summary\n"]
32+
33+
def add_line(f1, f2, f3, f4):
34+
# each line must have exactly 4 tab-separated fields
35+
res.append(f1 + "\t" + f2 + "\t" + f3 + "\t" + f4)
36+
37+
38+
inactive_states = ["pub", "watching", "dead"]
39+
40+
in_iesg_process = all_ids.exclude(states=State.objects.get(type="draft", slug="rfc")).filter(states__in=list(State.objects.filter(type="draft-iesg").exclude(slug__in=inactive_states))).only("name", "rev")
41+
42+
# handle those actively in the IESG process
43+
for d in in_iesg_process:
44+
state = d.get_state("draft-iesg").name
45+
tags = d.tags.filter(slug__in=IESG_SUBSTATE_TAGS).values_list("name", flat=True)
46+
if tags:
47+
state += "::" + "::".join(tags)
48+
add_line(d.name + "-" + d.rev,
49+
formatted_rev_date(d.name),
50+
"In IESG processing - ID Tracker state <" + state + ">",
51+
"",
52+
)
53+
54+
55+
# handle the rest
56+
57+
not_in_process = all_ids.exclude(pk__in=[d.name for d in in_iesg_process])
58+
59+
for s in State.objects.filter(type="draft").order_by("order"):
60+
for name, rev in not_in_process.filter(states=s).values_list("name", "rev"):
61+
state = s.name
62+
last_field = ""
63+
64+
if s.slug == "rfc":
65+
a = rfc_aliases.get(name)
66+
if a:
67+
last_field = a[3:]
68+
elif s.slug == "repl":
69+
state += " replaced by " + replacements.get(name, "0")
70+
71+
add_line(name + "-" + rev,
72+
formatted_rev_date(name),
73+
state,
74+
last_field,
75+
)
76+
77+
return u"\n".join(res) + "\n"
78+
79+
def file_types_for_drafts():
80+
"""Look in the draft directory and return file types found as dict (name + rev -> [t1, t2, ...])."""
81+
file_types = {}
82+
for filename in os.listdir(settings.INTERNET_DRAFT_PATH):
83+
if filename.startswith("draft-"):
84+
base, ext = os.path.splitext(filename)
85+
if ext:
86+
if base not in file_types:
87+
file_types[base] = [ext]
88+
else:
89+
file_types[base].append(ext)
90+
91+
return file_types
92+
93+
def all_id2_txt():
94+
# this returns a lot of data so try to be efficient
95+
96+
drafts = Document.objects.filter(type="draft").exclude(name__startswith="rfc").order_by('name').select_related('group', 'group__parent', 'ad', 'ad__email', 'intended_std_level', 'shepherd', 'shepherd__email')
97+
98+
rfc_aliases = dict(DocAlias.objects.filter(name__startswith="rfc",
99+
document__states=State.objects.get(type="draft", slug="rfc")).values_list("document_id", "name"))
100+
101+
replacements = dict(RelatedDocument.objects.filter(target__document__states=State.objects.get(type="draft", slug="repl"),
102+
relationship="replaces").values_list("target__document_id", "source"))
103+
104+
revision_time = dict(DocEvent.objects.filter(type="new_revision", doc__name__startswith="draft-").order_by('time').values_list("doc_id", "time"))
105+
106+
file_types = file_types_for_drafts()
107+
108+
authors = {}
109+
for a in DocumentAuthor.objects.filter(document__name__startswith="draft-").order_by("order").select_related("author", "author__person").iterator():
110+
if a.document_id not in authors:
111+
l = authors[a.document_id] = []
112+
else:
113+
l = authors[a.document_id]
114+
if "@" in a.author.address:
115+
l.append(u'%s <%s>' % (a.author.person.plain_name().replace("@", ""), a.author.address.replace(",", "")))
116+
else:
117+
l.append(a.author.person.plain_name())
118+
119+
res = []
120+
for d in drafts:
121+
state = d.get_state_slug()
122+
iesg_state = d.get_state("draft-iesg")
123+
124+
fields = []
125+
# 0
126+
fields.append(d.name + "-" + d.rev)
127+
# 1
128+
fields.append("-1") # used to be internal numeric identifier, we don't have that anymore
129+
# 2
130+
fields.append(d.get_state().name if state else "")
131+
# 3
132+
if state == "active":
133+
s = "I-D Exists"
134+
if iesg_state:
135+
s = iesg_state.name
136+
tags = d.tags.filter(slug__in=IESG_SUBSTATE_TAGS).values_list("name", flat=True)
137+
if tags:
138+
s += "::" + "::".join(tags)
139+
fields.append(s)
140+
else:
141+
fields.append("")
142+
# 4
143+
rfc_number = ""
144+
if state == "rfc":
145+
a = rfc_aliases.get(d.name)
146+
if a:
147+
rfc_number = a[3:]
148+
fields.append(rfc_number)
149+
# 5
150+
repl = ""
151+
if state == "repl":
152+
repl = replacements.get(d.name, "")
153+
fields.append(repl)
154+
# 6
155+
t = revision_time.get(d.name)
156+
fields.append(t.strftime("%Y-%m-%d") if t else "")
157+
# 7
158+
group_acronym = ""
159+
if d.group and d.group.type_id != "area" and d.group.acronym != "none":
160+
group_acronym = d.group.acronym
161+
fields.append(group_acronym)
162+
# 8
163+
area = ""
164+
if d.group:
165+
if d.group.type_id == "area":
166+
area = d.group.acronym
167+
elif d.group.type_id == "wg" and d.group.parent and d.group.parent.type_id == "area":
168+
area = d.group.parent.acronym
169+
fields.append(area)
170+
# 9 responsible AD name
171+
fields.append(unicode(d.ad) if d.ad else "")
172+
# 10
173+
fields.append(d.intended_std_level.name if d.intended_std_level else "")
174+
# 11
175+
lc_expires = ""
176+
if iesg_state and iesg_state.slug == "lc":
177+
e = d.latest_event(LastCallDocEvent, type="sent_last_call")
178+
if e:
179+
lc_expires = e.expires.strftime("%Y-%m-%d")
180+
fields.append(lc_expires)
181+
# 12
182+
fields.append(",".join(file_types.get(d.name + "-" + d.rev, "")) if state == "active" else "")
183+
# 13
184+
fields.append(clean_whitespace(d.title)) # FIXME: we should make sure this is okay in the database and in submit
185+
# 14
186+
fields.append(u", ".join(authors.get(d.name, [])))
187+
# 15
188+
fields.append(d.shepherd.formatted_email().replace('"', '') if d.shepherd else "")
189+
# 16 Responsible AD name and email
190+
fields.append(d.ad.formatted_email().replace('"', '') if d.ad else "")
191+
192+
#
193+
res.append(u"\t".join(fields))
194+
195+
return render_to_string("idindex/all_id2.txt", {'data': u"\n".join(res) })
196+
197+
def active_drafts_index_by_group(extra_values=()):
198+
"""Return active drafts grouped into their corresponding
199+
associated group, for spitting out draft index."""
200+
201+
# this returns a lot of data so try to be efficient
202+
203+
active_state = State.objects.get(type="draft", slug="active")
204+
205+
groups_dict = dict((g.id, g) for g in Group.objects.all())
206+
207+
extracted_values = ("name", "rev", "title", "group_id") + extra_values
208+
209+
docs_dict = dict((d["name"], d)
210+
for d in Document.objects.filter(states=active_state).values(*extracted_values))
211+
212+
# add initial and latest revision time
213+
for time, doc_id in NewRevisionDocEvent.objects.filter(type="new_revision", doc__states=active_state).order_by('-time').values_list("time", "doc_id"):
214+
d = docs_dict.get(doc_id)
215+
if d:
216+
if "rev_time" not in d:
217+
d["rev_time"] = time
218+
d["initial_rev_time"] = time
219+
220+
# add authors
221+
for a in DocumentAuthor.objects.filter(document__states=active_state).order_by("order").select_related("author__person"):
222+
d = docs_dict.get(a.document_id)
223+
if d:
224+
if "authors" not in d:
225+
d["authors"] = []
226+
d["authors"].append(unicode(a.author.person))
227+
228+
# put docs into groups
229+
for d in docs_dict.itervalues():
230+
g = groups_dict.get(d["group_id"])
231+
if not g:
232+
continue
233+
234+
if not hasattr(g, "active_drafts"):
235+
g.active_drafts = []
236+
237+
g.active_drafts.append(d)
238+
239+
groups = [g for g in groups_dict.itervalues() if hasattr(g, "active_drafts")]
240+
groups.sort(key=lambda g: g.acronym)
241+
242+
fallback_time = datetime.datetime(1950, 1, 1)
243+
for g in groups:
244+
g.active_drafts.sort(key=lambda d: d.get("initial_rev_time", fallback_time))
245+
246+
return groups
247+
248+
def id_index_txt(with_abstracts=False):
249+
extra_values = ()
250+
if with_abstracts:
251+
extra_values = ("abstract",)
252+
groups = active_drafts_index_by_group(extra_values)
253+
254+
file_types = file_types_for_drafts()
255+
for g in groups:
256+
for d in g.active_drafts:
257+
# we need to output a multiple extension thing
258+
types = file_types.get(d["name"] + "-" + d["rev"], "")
259+
exts = ".txt"
260+
if ".ps" in types:
261+
exts += ",.ps"
262+
if ".pdf" in types:
263+
exts += ",.pdf"
264+
d["exts"] = exts
265+
266+
return render_to_string("idindex/id_index.txt", {
267+
'groups': groups,
268+
'time': datetime.datetime.now(pytz.UTC).strftime("%Y-%m-%d %H:%M:%S %Z"),
269+
'with_abstracts': with_abstracts,
270+
})

ietf/idindex/models.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +0,0 @@
1-
# Copyright The IETF Trust 2007, All Rights Reserved

0 commit comments

Comments
 (0)