Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions ietf/sync/bibxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,25 @@ def get_fyi_bibxml(fyi_number):
return f"""<referencegroup anchor="FYI{fyi_number}" target="{fyi_link}">{rfc_bibxml}</referencegroup>"""


def get_id_bibxml(draft_name, doc):
"""Return BibXML entry for the given I-D doc"""
name = "-".join(draft_name.split("-", 2)[1:])
date = ""
if doc.is_dochistory():

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should implement DocHistory.pub_date() so that you don't have to branch here

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that'll make things easier. But this is the sort of same logic that has been used elsewhere in DT get revisions.

latest_event = doc.latest_event(type="new_revision", rev=doc.rev)
if latest_event:
doc.pub_date = latest_event.time
date = doc.pub_date.strftime('<date day="%-d" month="%B" year="%Y"/>')
Comment on lines +91 to +92

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend against pseudo-shadowing pub_date this way. It's not a true shadow since a DocHistory doesn't have a pub_date() method, but to a person reading the code it appears to be replacing the method.

I think it's just a temp variable and you don't actually need to attach it to the DocHistory.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, happy to improve. I was using the same logic as in

if doc.is_dochistory():
latest_event = doc.latest_event(type='new_revision', rev=rev)
if latest_event:
doc.pub_date = latest_event.time

IMO there should be easier way to get the revisions.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect it's done there for use in a template (where {{ doc.pub_date }} will call the callable if necessary.

else:
date = doc.pub_date().strftime('<date day="%-d" month="%B" year="%Y"/>')
link = f"https://datatracker.ietf.org/doc/html/{draft_name}-{doc.rev}"
authors = ""
for author in doc.author_persons_or_names():
authors += f"""<author fullname={qa(author.person.name)} />"""

return f"""<reference anchor="I-D.{name}" target="{link}"><front><title>{doc.title}</title>{date}{authors}<abstract><t>{doc.abstract}</t></abstract></front><seriesInfo name="Internet-Draft" value="{draft_name}-{doc.rev}"/></reference>"""


def save_bibxml(bibxml, filename):
"""Prettify and save given BibXML"""

Expand Down Expand Up @@ -156,3 +175,31 @@ def recreate_rfcsubseries_bibxml():
filename = f"bibxml-rfcsubseries/fyi{fyi_number}.xml"
bibxml = get_fyi_bibxml(fyi_number)
save_bibxml(bibxml, filename)


def recreate_id_bibxml_by_draft_name(draft_name):
"""Creates BibXML for given draft_name."""
doc = Document.objects.get(name=draft_name)
name = "-".join(draft_name.split("-", 2)[1:])

# revision less BibXML
bibxml = get_id_bibxml(draft_name, doc)
filename = f"bibxml-ids/reference.I-D.{name}.xml"
save_bibxml(bibxml, filename)

# draft BibXML for each revision
for revision in reversed(doc.revisions_by_newrevisionevent()):
doc_rev = doc.history_set.order_by("-time").filter(rev=revision).first()
bibxml = get_id_bibxml(draft_name, doc_rev)
filename = f"bibxml-ids/reference.I-D.{draft_name}-{revision}.xml"
save_bibxml(bibxml, filename)


def recreate_id_bibxml():
"""Creates BibXML for all Internet Drafts."""

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we probably want to have a way to throttle this, run on subsets, etc. There are many drafts.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would imagine this to be a one time thing. So kind of hoping we run this manually as required because it's resource consuming.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, and that resource consumption is actually my worry. This is going to tie up a celery worker for the entire run, and if it fails part way through we have to start over.

Do you have a sense for how long it takes? If it's five minutes, then fine. If it's hours, then we want to be prepared to manage it in more detail

for draft_name in (
Document.objects.filter(type_id="draft")
.values_list("name", flat=True)
.order_by("-time")
):
recreate_id_bibxml_by_draft_name(draft_name)
62 changes: 62 additions & 0 deletions ietf/sync/tests_bibxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@
FyiFactory,
PublishedRfcDocEventFactory,
StdFactory,
WgDraftFactory,
)
from ietf.sync.bibxml import (
get_bcp_bibxml,
get_fyi_bibxml,
get_id_bibxml,
get_rfc_bibxml,
get_std_bibxml,
recreate_id_bibxml,
recreate_id_bibxml_by_draft_name,
recreate_rfc_bibxml,
recreate_rfcsubseries_bibxml,
save_bibxml,
Expand Down Expand Up @@ -48,6 +52,9 @@ def setUp(self):
# Create a FYI with non-April Fools RFC
self.fyi = FyiFactory(contains=[self.rfc], name="fyi3")

# Create a draft with multiple revisions
self.draft = WgDraftFactory(create_revisions=(0, 1, 2))

def test_get_rfc_bibxml(self):
bibxml = get_rfc_bibxml(self.rfc.rfc_number)
self.assertIsNotNone(ElementTree.fromstring(bibxml))
Expand Down Expand Up @@ -93,6 +100,25 @@ def test_get_fyi_bibxml(self):
)
self.assertIn('<date month="April" year="2021"/>', bibxml)

def test_get_id_bibxml(self):
draft_name = self.draft.name

# revisionless test
bibxml = get_id_bibxml(draft_name, self.draft)
self.assertIsNotNone(ElementTree.fromstring(bibxml))
self.assertIn(draft_name, bibxml)
self.assertIn(f"{draft_name}-02", bibxml)

# revision test
for revision in self.draft.revisions_by_newrevisionevent():
draft_rev = (
self.draft.history_set.order_by("-time").filter(rev=revision).first()
)
bibxml = get_id_bibxml(draft_name, draft_rev)
self.assertIsNotNone(ElementTree.fromstring(bibxml))
self.assertIn(draft_name, bibxml)
self.assertIn(f"{draft_name}-{revision}", bibxml)

def test_save_to_bucket(self):
bibxml_bucket = storages["bibxml_bucket"]
with override_settings(BIBXML_DELETE_THEN_WRITE=False):
Expand Down Expand Up @@ -185,3 +211,39 @@ def test_recreate_rfcsubseries_bibxml(self, mock_save_bibxml):
call(ANY, fyi_filename),
]
)

@patch("ietf.sync.bibxml.save_bibxml")
def test_recreate_id_bibxml_by_draft_name(self, mock_save_bibxml):
draft_name = self.draft.name
name = "-".join(draft_name.split("-", 2)[1:])

recreate_id_bibxml_by_draft_name(draft_name)
revision_less_filename = f"bibxml-ids/reference.I-D.{name}.xml"
revisioned_file_names = [
f"bibxml-ids/reference.I-D.{draft_name}-{r}.xml"
for r in reversed(self.draft.revisions_by_newrevisionevent())
]
mock_save_bibxml.assert_has_calls(
[
call(ANY, revision_less_filename),
*[call(ANY, file_name) for file_name in revisioned_file_names],
]
)

@patch("ietf.sync.bibxml.save_bibxml")
def test_recreate_id_bibxml(self, mock_save_bibxml):
draft_name = self.draft.name
name = "-".join(draft_name.split("-", 2)[1:])

recreate_id_bibxml()
revision_less_filename = f"bibxml-ids/reference.I-D.{name}.xml"
revisioned_file_names = [
f"bibxml-ids/reference.I-D.{draft_name}-{r}.xml"
for r in reversed(self.draft.revisions_by_newrevisionevent())
]
mock_save_bibxml.assert_has_calls(
[
call(ANY, revision_less_filename),
*[call(ANY, file_name) for file_name in revisioned_file_names],
]
)
Loading