Skip to content

Commit ffb9eb1

Browse files
refactor: generate I-D bibxml files via celery (ietf-tools#7426)
* refactor: task to generate_draft_bibxml_files * test: test task/utility methods * chore: add periodic task * chore: remove generate_draft_bibxml_files.py * chore: further prune /bin/hourly
1 parent de8b3b5 commit ffb9eb1

7 files changed

Lines changed: 195 additions & 108 deletions

File tree

bin/hourly

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,33 +5,15 @@
55
# This script is expected to be triggered by cron from
66
# /etc/cron.d/datatracker
77
export LANG=en_US.UTF-8
8-
export PYTHONIOENCODING=utf-8
98

109
# Make sure we stop if something goes wrong:
1110
program=${0##*/}
1211
trap 'echo "$program($LINENO): Command failed with error code $? ([$$] $0 $*)"; exit 1' ERR
1312

14-
DTDIR=/a/www/ietf-datatracker/web
15-
cd $DTDIR/
16-
17-
# Set up the virtual environment
18-
source $DTDIR/env/bin/activate
19-
2013
logger -p user.info -t cron "Running $DTDIR/bin/hourly"
2114

22-
# Generate some static files
23-
ID=/a/ietfdata/doc/draft/repository
24-
DERIVED=/a/ietfdata/derived
25-
DOWNLOAD=/a/www/www6s/download
26-
2715
CHARTER=/a/www/ietf-ftp/charter
2816
wget -q https://datatracker.ietf.org/wg/1wg-charters-by-acronym.txt -O $CHARTER/1wg-charters-by-acronym.txt
2917
wget -q https://datatracker.ietf.org/wg/1wg-charters.txt -O $CHARTER/1wg-charters.txt
3018

31-
# Regenerate the last week of bibxml-ids
32-
$DTDIR/ietf/manage.py generate_draft_bibxml_files
33-
34-
# Create and update group wikis
35-
#$DTDIR/ietf/manage.py create_group_wikis
36-
3719
# exit 0

ietf/doc/management/commands/generate_draft_bibxml_files.py

Lines changed: 0 additions & 84 deletions
This file was deleted.

ietf/doc/tasks.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pathlib import Path
1010

1111
from django.conf import settings
12+
from django.utils import timezone
1213

1314
from ietf.utils import log
1415
from ietf.utils.timezone import datetime_today
@@ -24,8 +25,13 @@
2425
send_expire_warning_for_draft,
2526
)
2627
from .lastcall import get_expired_last_calls, expire_last_call
27-
from .models import Document
28-
from .utils import generate_idnits2_rfc_status, generate_idnits2_rfcs_obsoleted
28+
from .models import Document, NewRevisionDocEvent
29+
from .utils import (
30+
generate_idnits2_rfc_status,
31+
generate_idnits2_rfcs_obsoleted,
32+
update_or_create_draft_bibxml_file,
33+
ensure_draft_bibxml_path_exists,
34+
)
2935

3036

3137
@shared_task
@@ -90,3 +96,24 @@ def generate_idnits2_rfcs_obsoleted_task():
9096
outpath.write_text(blob, encoding="utf8")
9197
except Exception as e:
9298
log.log(f"failed to write idnits2-rfcs-obsoleted: {e}")
99+
100+
101+
@shared_task
102+
def generate_draft_bibxml_files_task(days=7, process_all=False):
103+
"""Generate bibxml files for recently updated docs
104+
105+
If process_all is False (the default), processes only docs with new revisions
106+
in the last specified number of days.
107+
"""
108+
ensure_draft_bibxml_path_exists()
109+
doc_events = NewRevisionDocEvent.objects.filter(
110+
type="new_revision",
111+
doc__type_id="draft",
112+
).order_by("time")
113+
if not process_all:
114+
doc_events = doc_events.filter(time__gte=timezone.now() - datetime.timedelta(days=days))
115+
for event in doc_events:
116+
try:
117+
update_or_create_draft_bibxml_file(event.doc, event.rev)
118+
except Exception as err:
119+
log.log(f"Error generating bibxml for {event.doc.name}-{event.rev}: {err}")

ietf/doc/tests_tasks.py

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
11
# Copyright The IETF Trust 2024, All Rights Reserved
2+
import datetime
23
import mock
34

45
from pathlib import Path
56

67
from django.conf import settings
8+
from django.utils import timezone
79

810
from ietf.utils.test_utils import TestCase
911
from ietf.utils.timezone import datetime_today
1012

11-
from .factories import DocumentFactory
12-
from .models import Document
13+
from .factories import DocumentFactory, NewRevisionDocEventFactory
14+
from .models import Document, NewRevisionDocEvent
1315
from .tasks import (
1416
expire_ids_task,
1517
expire_last_calls_task,
18+
generate_draft_bibxml_files_task,
1619
generate_idnits2_rfcs_obsoleted_task,
1720
generate_idnits2_rfc_status_task,
1821
notify_expirations_task,
@@ -114,3 +117,86 @@ def test_generate_idnits2_rfcs_obsoleted_task(self, mock_generate):
114117
"dåtå".encode("utf8"),
115118
(Path(settings.DERIVED_DIR) / "idnits2-rfcs-obsoleted").read_bytes(),
116119
)
120+
121+
@mock.patch("ietf.doc.tasks.ensure_draft_bibxml_path_exists")
122+
@mock.patch("ietf.doc.tasks.update_or_create_draft_bibxml_file")
123+
def test_generate_draft_bibxml_files_task(self, mock_create, mock_ensure_path):
124+
now = timezone.now()
125+
very_old_event = NewRevisionDocEventFactory(
126+
time=now - datetime.timedelta(days=1000), rev="17"
127+
)
128+
old_event = NewRevisionDocEventFactory(
129+
time=now - datetime.timedelta(days=8), rev="03"
130+
)
131+
young_event = NewRevisionDocEventFactory(
132+
time=now - datetime.timedelta(days=6), rev="06"
133+
)
134+
# a couple that should always be ignored
135+
NewRevisionDocEventFactory(
136+
time=now - datetime.timedelta(days=6), rev="09", doc__type_id="rfc" # not a draft
137+
)
138+
NewRevisionDocEventFactory(
139+
type="changed_document", # not a "new_revision" type
140+
time=now - datetime.timedelta(days=6),
141+
rev="09",
142+
doc__type_id="rfc",
143+
)
144+
145+
# Get rid of the "00" events created by the factories -- they're just noise for this test
146+
NewRevisionDocEvent.objects.filter(rev="00").delete()
147+
148+
# default args - look back 7 days
149+
generate_draft_bibxml_files_task()
150+
self.assertTrue(mock_ensure_path.called)
151+
self.assertCountEqual(
152+
mock_create.call_args_list, [mock.call(young_event.doc, young_event.rev)]
153+
)
154+
mock_create.reset_mock()
155+
mock_ensure_path.reset_mock()
156+
157+
# shorter lookback
158+
generate_draft_bibxml_files_task(days=5)
159+
self.assertTrue(mock_ensure_path.called)
160+
self.assertCountEqual(mock_create.call_args_list, [])
161+
mock_create.reset_mock()
162+
mock_ensure_path.reset_mock()
163+
164+
# longer lookback
165+
generate_draft_bibxml_files_task(days=9)
166+
self.assertTrue(mock_ensure_path.called)
167+
self.assertCountEqual(
168+
mock_create.call_args_list,
169+
[
170+
mock.call(young_event.doc, young_event.rev),
171+
mock.call(old_event.doc, old_event.rev),
172+
],
173+
)
174+
mock_create.reset_mock()
175+
mock_ensure_path.reset_mock()
176+
177+
# everything
178+
generate_draft_bibxml_files_task(process_all=True)
179+
self.assertTrue(mock_ensure_path.called)
180+
self.assertCountEqual(
181+
mock_create.call_args_list,
182+
[
183+
mock.call(young_event.doc, young_event.rev),
184+
mock.call(old_event.doc, old_event.rev),
185+
mock.call(very_old_event.doc, very_old_event.rev),
186+
],
187+
)
188+
mock_create.reset_mock()
189+
mock_ensure_path.reset_mock()
190+
191+
# everything should still be tried, even if there's an exception
192+
mock_create.side_effect = RuntimeError
193+
generate_draft_bibxml_files_task(process_all=True)
194+
self.assertTrue(mock_ensure_path.called)
195+
self.assertCountEqual(
196+
mock_create.call_args_list,
197+
[
198+
mock.call(young_event.doc, young_event.rev),
199+
mock.call(old_event.doc, old_event.rev),
200+
mock.call(very_old_event.doc, very_old_event.rev),
201+
],
202+
)

ietf/doc/tests_utils.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
import datetime
33
import debug # pyflakes:ignore
44

5-
from unittest.mock import patch
5+
from pathlib import Path
6+
from unittest.mock import call, patch
67

8+
from django.conf import settings
79
from django.db import IntegrityError
810
from django.test.utils import override_settings
911
from django.utils import timezone
@@ -16,7 +18,8 @@
1618
from ietf.doc.factories import DocumentFactory, WgRfcFactory, WgDraftFactory
1719
from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor
1820
from ietf.doc.utils import (update_action_holders, add_state_change_event, update_documentauthors,
19-
fuzzy_find_documents, rebuild_reference_relations, build_file_urls)
21+
fuzzy_find_documents, rebuild_reference_relations, build_file_urls,
22+
ensure_draft_bibxml_path_exists, update_or_create_draft_bibxml_file)
2023
from ietf.utils.draft import Draft, PlaintextDraft
2124
from ietf.utils.xmldraft import XMLDraft
2225

@@ -484,3 +487,49 @@ def test_xml_and_plaintext(self, mock_init, mock_get_refs, mock_plaintext_init):
484487
(self.updated.name, 'updates'),
485488
]
486489
)
490+
491+
492+
class DraftBibxmlTests(TestCase):
493+
settings_temp_path_overrides = TestCase.settings_temp_path_overrides + ["BIBXML_BASE_PATH"]
494+
495+
def test_ensure_draft_bibxml_path_exists(self):
496+
expected = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
497+
self.assertFalse(expected.exists())
498+
ensure_draft_bibxml_path_exists()
499+
self.assertTrue(expected.is_dir()) # false if does not exist or is not dir
500+
501+
@patch("ietf.doc.utils.bibxml_for_draft", return_value="This\ris\nmy\r\nbibxml")
502+
def test_create_draft_bibxml_file(self, mock):
503+
bibxml_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
504+
bibxml_path.mkdir(exist_ok=False) # expect to start with a clean slate
505+
506+
doc = DocumentFactory()
507+
ref_path = bibxml_path / f"reference.I-D.{doc.name}-26.xml" # we're pretending it's rev 26
508+
509+
update_or_create_draft_bibxml_file(doc, "26")
510+
self.assertEqual(mock.call_count, 1)
511+
self.assertEqual(mock.call_args, call(doc, "26"))
512+
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")
513+
514+
@patch("ietf.doc.utils.bibxml_for_draft", return_value="This\ris\nmy\r\nbibxml")
515+
def test_update_draft_bibxml_file(self, mock):
516+
bibxml_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids"
517+
bibxml_path.mkdir(exist_ok=False) # expect to start with a clean slate
518+
519+
doc = DocumentFactory()
520+
ref_path = bibxml_path / f"reference.I-D.{doc.name}-26.xml" # we're pretending it's rev 26
521+
ref_path.write_text("Old data")
522+
523+
# should replace it
524+
update_or_create_draft_bibxml_file(doc, "26")
525+
self.assertEqual(mock.call_count, 1)
526+
self.assertEqual(mock.call_args, call(doc, "26"))
527+
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")
528+
529+
# should leave it alone if it differs only by leading/trailing whitespace
530+
mock.reset_mock()
531+
mock.return_value = " \n This\nis\nmy\nbibxml "
532+
update_or_create_draft_bibxml_file(doc, "26")
533+
self.assertEqual(mock.call_count, 1)
534+
self.assertEqual(mock.call_args, call(doc, "26"))
535+
self.assertEqual(ref_path.read_text(), "This\nis\nmy\nbibxml")

ietf/doc/utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,3 +1413,20 @@ def investigate_fragment(name_fragment):
14131413
unverifiable_collections=unverifiable_collections,
14141414
unexpected=unexpected,
14151415
)
1416+
1417+
1418+
def update_or_create_draft_bibxml_file(doc, rev):
1419+
log.assertion("doc.type_id == 'draft'")
1420+
normalized_bibxml = re.sub(r"\r\n?", r"\n", bibxml_for_draft(doc, rev))
1421+
ref_rev_file_path = Path(settings.BIBXML_BASE_PATH) / "bibxml-ids" / f"reference.I-D.{doc.name}-{rev}.xml"
1422+
try:
1423+
existing_bibxml = ref_rev_file_path.read_text(encoding="utf8")
1424+
except IOError:
1425+
existing_bibxml = ""
1426+
if normalized_bibxml.strip() != existing_bibxml.strip():
1427+
log.log(f"Writing {ref_rev_file_path}")
1428+
ref_rev_file_path.write_text(normalized_bibxml, encoding="utf8")
1429+
1430+
1431+
def ensure_draft_bibxml_path_exists():
1432+
(Path(settings.BIBXML_BASE_PATH) / "bibxml-ids").mkdir(exist_ok=True)

ietf/utils/management/commands/periodic_tasks.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,16 @@ def create_default_tasks(self):
221221
),
222222
)
223223

224+
PeriodicTask.objects.get_or_create(
225+
name="Generate I-D bibxml files",
226+
task="ietf.doc.tasks.generate_draft_bibxml_files_task",
227+
defaults=dict(
228+
enabled=False,
229+
crontab=self.crontabs["hourly"],
230+
description="Generate draft bibxml files for the last week's drafts",
231+
),
232+
)
233+
224234
def show_tasks(self):
225235
for label, crontab in self.crontabs.items():
226236
tasks = PeriodicTask.objects.filter(crontab=crontab).order_by(

0 commit comments

Comments
 (0)