1- import datetime , itertools , json , calendar
1+ import datetime
2+ import itertools
3+ import json
4+ import calendar
5+ import os
6+ import re
27from collections import defaultdict
38
49from django .shortcuts import render
712from django .http import HttpResponseRedirect , HttpResponseForbidden
813from django .db .models import Count
914from django .utils .safestring import mark_safe
15+ from django .conf import settings
1016
1117import dateutil .relativedelta
1218
1521 ReviewRequestData ,
1622 compute_review_request_stats ,
1723 sum_raw_review_request_aggregations )
24+ from ietf .submit .models import Submission
1825from ietf .group .models import Role , Group
1926from ietf .person .models import Person
2027from ietf .name .models import ReviewRequestStateName , ReviewResultName
21- from ietf .doc .models import Document
28+ from ietf .doc .models import DocAlias
2229from ietf .ietfauth .utils import has_role
2330
2431def stats_index (request ):
@@ -48,7 +55,6 @@ def generate_query_string(query_dict, overrides):
4855
4956 return query_part
5057
51-
5258def document_stats (request , stats_type = None , document_type = None ):
5359 def build_document_stats_url (stats_type_override = Ellipsis , document_type_override = Ellipsis , get_overrides = {}):
5460 kwargs = {
@@ -60,10 +66,11 @@ def build_document_stats_url(stats_type_override=Ellipsis, document_type_overrid
6066
6167 # statistics type - one of the tables or the chart
6268 possible_stats_types = [
63- ("authors" , "Number of authors " ),
69+ ("authors" , "Authors " ),
6470 ("pages" , "Pages" ),
65- # ("format", "Format"),
66- # ("spectech", "Specification techniques"),
71+ ("words" , "Words" ),
72+ ("format" , "Format" ),
73+ ("formlang" , "Formal languages" ),
6774 ]
6875
6976 possible_stats_types = [ (slug , label , build_document_stats_url (stats_type_override = slug ))
@@ -85,13 +92,34 @@ def build_document_stats_url(stats_type_override=Ellipsis, document_type_overrid
8592 return HttpResponseRedirect (build_document_stats_url (document_type_override = possible_document_types [0 ][0 ]))
8693
8794
95+ def put_into_bin (value , bin_size ):
96+ if value is None :
97+ return (value , value )
98+
99+ v = (value // bin_size ) * bin_size
100+ return (v , "{} - {}" .format (v , v + bin_size - 1 ))
101+
102+ def generate_canonical_names (docalias_qs ):
103+ for doc_id , ts in itertools .groupby (docalias_qs .order_by ("document" ), lambda t : t [0 ]):
104+ chosen = None
105+ for t in ts :
106+ if chosen is None :
107+ chosen = t
108+ else :
109+ if t [0 ].startswith ("rfc" ):
110+ chosen = t
111+ elif t [0 ].startswith ("draft" ) and not chosen [0 ].startswith ("rfc" ):
112+ chosen = t
113+
114+ yield chosen
115+
88116 # filter documents
89- doc_qs = Document .objects .filter (type = "draft" )
117+ docalias_qs = DocAlias .objects .filter (document__type = "draft" )
90118
91119 if document_type == "rfc" :
92- doc_qs = doc_qs .filter (states__type = "draft" , states__slug = "rfc" )
120+ docalias_qs = docalias_qs .filter (document__states__type = "draft" , document__states__slug = "rfc" )
93121 elif document_type == "draft" :
94- doc_qs = doc_qs .exclude (states__type = "draft" , states__slug = "rfc" )
122+ docalias_qs = docalias_qs .exclude (document__states__type = "draft" , document__states__slug = "rfc" )
95123
96124 chart_data = []
97125 table_data = []
@@ -104,19 +132,20 @@ def build_document_stats_url(stats_type_override=Ellipsis, document_type_overrid
104132 doc_label = "draft"
105133
106134 stats_title = ""
135+ bin_size = 1
107136
108137 if stats_type == "authors" :
109138 stats_title = "Number of authors for each {}" .format (doc_label )
110139
111- groups = defaultdict (list )
140+ bins = defaultdict (list )
112141
113- for name , author_count in doc_qs .values_list ("name" ).annotate (Count ("authors " )). iterator ( ):
114- groups [author_count ].append (name )
142+ for name , author_count in generate_canonical_names ( docalias_qs .values_list ("name" ).annotate (Count ("document__authors " ))):
143+ bins [author_count ].append (name )
115144
116- total_docs = sum (len (names ) for author_count , names in groups .iteritems ())
145+ total_docs = sum (len (names ) for author_count , names in bins .iteritems ())
117146
118147 series_data = []
119- for author_count , names in sorted (groups .iteritems (), key = lambda t : t [0 ]):
148+ for author_count , names in sorted (bins .iteritems (), key = lambda t : t [0 ]):
120149 percentage = len (names ) * 100.0 / total_docs
121150 series_data .append ((author_count , percentage ))
122151 table_data .append ((author_count , percentage , names ))
@@ -129,15 +158,15 @@ def build_document_stats_url(stats_type_override=Ellipsis, document_type_overrid
129158 elif stats_type == "pages" :
130159 stats_title = "Number of pages for each {}" .format (doc_label )
131160
132- groups = defaultdict (list )
161+ bins = defaultdict (list )
133162
134- for name , pages in doc_qs .values_list ("name" , "pages" ):
135- groups [pages ].append (name )
163+ for name , pages in generate_canonical_names ( docalias_qs .values_list ("name" , "document__pages" ) ):
164+ bins [pages ].append (name )
136165
137- total_docs = sum (len (names ) for pages , names in groups .iteritems ())
166+ total_docs = sum (len (names ) for pages , names in bins .iteritems ())
138167
139168 series_data = []
140- for pages , names in sorted (groups .iteritems (), key = lambda t : t [0 ]):
169+ for pages , names in sorted (bins .iteritems (), key = lambda t : t [0 ]):
141170 percentage = len (names ) * 100.0 / total_docs
142171 if pages is not None :
143172 series_data .append ((pages , len (names )))
@@ -148,7 +177,86 @@ def build_document_stats_url(stats_type_override=Ellipsis, document_type_overrid
148177 "animation" : False ,
149178 })
150179
180+ elif stats_type == "words" :
181+ stats_title = "Number of words for each {}" .format (doc_label )
182+
183+ bin_size = 500
184+
185+ bins = defaultdict (list )
186+
187+ for name , words in generate_canonical_names (docalias_qs .values_list ("name" , "document__words" )):
188+ bins [put_into_bin (words , bin_size )].append (name )
189+
190+ total_docs = sum (len (names ) for words , names in bins .iteritems ())
191+
192+ series_data = []
193+ for (value , words ), names in sorted (bins .iteritems (), key = lambda t : t [0 ][0 ]):
194+ percentage = len (names ) * 100.0 / total_docs
195+ if words is not None :
196+ series_data .append ((value , len (names )))
197+
198+ table_data .append ((words , percentage , names ))
199+
200+ chart_data .append ({
201+ "data" : series_data ,
202+ "animation" : False ,
203+ })
204+
205+ elif stats_type == "format" :
206+ stats_title = "Formats for each {}" .format (doc_label )
207+
208+ bins = defaultdict (list )
209+
210+ # on new documents, we should have a Submission row with the file types
211+ submission_types = {}
151212
213+ for doc_name , file_types in Submission .objects .values_list ("draft" , "file_types" ).order_by ("submission_date" , "id" ):
214+ submission_types [doc_name ] = file_types
215+
216+ doc_names_with_missing_types = {}
217+ for canonical_name , rev , doc_name in generate_canonical_names (docalias_qs .values_list ("name" , "document__rev" , "document__name" )):
218+ types = submission_types .get (doc_name )
219+ if types :
220+ for dot_ext in types .split ("," ):
221+ bins [dot_ext .lstrip ("." ).upper ()].append (canonical_name )
222+
223+ else :
224+
225+ if canonical_name .startswith ("rfc" ):
226+ filename = canonical_name
227+ else :
228+ filename = canonical_name + "-" + rev
229+
230+ doc_names_with_missing_types [filename ] = canonical_name
231+
232+ # look up the remaining documents on disk
233+ for filename in itertools .chain (os .listdir (settings .INTERNET_ALL_DRAFTS_ARCHIVE_DIR ), os .listdir (settings .RFC_PATH )):
234+ t = filename .split ("." , 1 )
235+ if len (t ) != 2 :
236+ continue
237+
238+ basename , ext = t
239+ if any (ext .lower ().endswith (blacklisted_ext .lower ()) for blacklisted_ext in settings .DOCUMENT_FORMAT_BLACKLIST ):
240+ continue
241+
242+ canonical_name = doc_names_with_missing_types .get (basename )
243+
244+ if canonical_name :
245+ bins [ext .upper ()].append (canonical_name )
246+
247+ total_docs = sum (len (names ) for fmt , names in bins .iteritems ())
248+
249+ series_data = []
250+ for fmt , names in sorted (bins .iteritems (), key = lambda t : t [0 ]):
251+ percentage = len (names ) * 100.0 / total_docs
252+ series_data .append ((fmt , len (names )))
253+
254+ table_data .append ((fmt , percentage , names ))
255+
256+ chart_data .append ({
257+ "data" : series_data ,
258+ "animation" : False ,
259+ })
152260
153261 return render (request , "stats/document_stats.html" , {
154262 "chart_data" : mark_safe (json .dumps (chart_data )),
@@ -159,6 +267,8 @@ def build_document_stats_url(stats_type_override=Ellipsis, document_type_overrid
159267 "possible_document_types" : possible_document_types ,
160268 "document_type" : document_type ,
161269 "doc_label" : doc_label ,
270+ "bin_size" : bin_size ,
271+ "content_template" : "stats/document_stats_{}.html" .format (stats_type ),
162272 })
163273
164274@login_required
0 commit comments