Skip to content

Commit b158512

Browse files
committed
Improve robustness of pdfization. Tune the test crawler. Commit ready for merge.
- Legacy-Id: 19813
1 parent 7d4ea4e commit b158512

3 files changed

Lines changed: 10 additions & 3 deletions

File tree

bin/test-crawl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ def skip_url(url):
232232
# Skip most html conversions, not worth the time
233233
"^/doc/html/draft-[0-9ac-z]",
234234
"^/doc/html/draft-b[0-9b-z]",
235+
"^/doc/pdf/draft-[0-9ac-z]",
236+
"^/doc/pdf/draft-b[0-9b-z]",
235237
"^/doc/html/charter-.*",
236238
"^/doc/html/status-.*",
237239
"^/doc/html/rfc.*",

ietf/doc/models.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,11 @@ def pdfized(self):
577577
pdf = None
578578
if not pdf:
579579
html = rfc2html.markup(text, path=settings.PDFIZER_URL_PREFIX)
580-
pdf = wpHTML(string=html).write_pdf(stylesheets=[io.BytesIO(b'html { font-size: 94%;}')])
580+
try:
581+
pdf = wpHTML(string=html.replace('\xad','')).write_pdf(stylesheets=[io.BytesIO(b'html { font-size: 94%;}')])
582+
except AssertionError:
583+
log.log(f'weasyprint failed with an assert on {self.name}')
584+
pdf = None
581585
if pdf:
582586
cache.set(cache_key, pdf, settings.PDFIZER_CACHE_TIME)
583587
return pdf

ietf/doc/utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,8 +1051,9 @@ def build_file_urls(doc):
10511051
label = "plain text" if t == "txt" else t
10521052
file_urls.append((label, base + doc.name + "-" + doc.rev + "." + t))
10531053

1054-
file_urls.append(("htmlized", urlreverse('ietf.doc.views_doc.document_html', kwargs=dict(name=doc.name, rev=doc.rev))))
1055-
file_urls.append(("pdfized", urlreverse('ietf.doc.views_doc.document_pdfized', kwargs=dict(name=doc.name, rev=doc.rev))))
1054+
if doc.text():
1055+
file_urls.append(("htmlized", urlreverse('ietf.doc.views_doc.document_html', kwargs=dict(name=doc.name, rev=doc.rev))))
1056+
file_urls.append(("pdfized", urlreverse('ietf.doc.views_doc.document_pdfized', kwargs=dict(name=doc.name, rev=doc.rev))))
10561057
file_urls.append(("bibtex", urlreverse('ietf.doc.views_doc.document_bibtex',kwargs=dict(name=doc.name,rev=doc.rev))))
10571058

10581059
return file_urls, found_types

0 commit comments

Comments
 (0)