From 3b0869caee9c60cb4c60dd254c0e01ae87cbf4fb Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Fri, 29 May 2026 15:52:42 -0500 Subject: [PATCH 1/2] feat: update rfc json --- ietf/api/tests_views_rpc.py | 70 +++++++ ietf/api/views_rpc.py | 11 +- ietf/doc/tests_utils_rfc_json.py | 333 +++++++++++++++++++++++++++++++ ietf/doc/utils_rfc_json.py | 197 ++++++++++++++++++ ietf/sync/errata.py | 7 +- ietf/sync/tasks.py | 17 +- ietf/sync/tests.py | 15 +- ietf/sync/tests_tasks.py | 62 ++++++ 8 files changed, 705 insertions(+), 7 deletions(-) create mode 100644 ietf/doc/tests_utils_rfc_json.py create mode 100644 ietf/doc/utils_rfc_json.py diff --git a/ietf/api/tests_views_rpc.py b/ietf/api/tests_views_rpc.py index 6d147c00b0..69e6d6b01b 100644 --- a/ietf/api/tests_views_rpc.py +++ b/ietf/api/tests_views_rpc.py @@ -565,3 +565,73 @@ def test_process_rpc_queue(self, mock_task_delay): ) self.assertEqual(response.status_code, 202) mock_task_delay.assert_called_once_with(queue_entries) + + @override_settings(APP_API_TOKENS={"ietf.api.views_rpc": ["valid-token"]}) + @mock.patch("ietf.api.views_rpc.update_rfc_json_task.delay") + def test_rfc_patch_triggers_json_update(self, mock_delay): + """PATCHing RFC metadata dispatches update_rfc_json_task for that RFC.""" + rfc = WgRfcFactory() + url = urlreverse( + "ietf.api.purple_api.rfc-detail", kwargs={"rfc_number": rfc.rfc_number} + ) + patch_data = {"title": "Updated Title"} + with self.captureOnCommitCallbacks(execute=True): + r = self.client.patch( + url, + data=patch_data, + format="json", + headers={"X-Api-Key": "valid-token"}, + ) + self.assertEqual(r.status_code, 200) + mock_delay.assert_called_once_with([rfc.rfc_number]) + + @override_settings(APP_API_TOKENS={"ietf.api.views_rpc": ["valid-token"]}) + @mock.patch("ietf.doc.tasks.signal_update_rfc_metadata_task.delay") + @mock.patch("ietf.api.views_rpc.update_rfc_json_task.delay") + def test_rfc_publish_triggers_related_json_update( + self, mock_json_delay, mock_signal_delay + ): + """Publishing an RFC that obsoletes/updates existing RFCs triggers JSON update for related RFCs only.""" + url = urlreverse("ietf.api.purple_api.notify_rfc_published") + area = GroupFactory(type_id="area") + rfc_group = GroupFactory(type_id="wg") + draft = WgDraftFactory(group__parent=area, stream_id="ietf") + obsoletes = RfcFactory.create_batch(2) + updates = RfcFactory.create_batch(1) + unused_rfc_number = ( + Document.objects.filter(rfc_number__isnull=False).aggregate( + unused_rfc_number=Max("rfc_number") + 1 + )["unused_rfc_number"] + or 20000 + ) + post_data = { + "published": "2025-06-01T00:00:00Z", + "draft_name": draft.name, + "draft_rev": draft.rev, + "rfc_number": unused_rfc_number, + "title": "New RFC", + "authors": [], + "group": rfc_group.acronym, + "stream": "ietf", + "abstract": "Abstract.", + "pages": 10, + "std_level": "ps", + "obsoletes": [o.rfc_number for o in obsoletes], + "updates": [u.rfc_number for u in updates], + "subseries": [], + } + with self.captureOnCommitCallbacks(execute=True): + r = self.client.post( + url, + data=post_data, + format="json", + headers={"X-Api-Key": "valid-token"}, + ) + self.assertEqual(r.status_code, 200) + + # JSON update fired only for related RFCs, not for the new RFC itself + expected_related = sorted( + {o.rfc_number for o in obsoletes} | {u.rfc_number for u in updates} + ) + mock_json_delay.assert_called_once_with(expected_related) + self.assertNotIn(unused_rfc_number, mock_json_delay.call_args[0][0]) diff --git a/ietf/api/views_rpc.py b/ietf/api/views_rpc.py index 83d0abefb1..0c9e98e2dc 100644 --- a/ietf/api/views_rpc.py +++ b/ietf/api/views_rpc.py @@ -5,7 +5,7 @@ from tempfile import TemporaryDirectory from django.conf import settings -from django.db import IntegrityError +from django.db import IntegrityError, transaction from drf_spectacular.utils import OpenApiParameter from rest_framework import mixins, parsers, serializers, viewsets, status from rest_framework.decorators import action @@ -48,7 +48,7 @@ ) from ietf.person.models import Email, Person from ietf.sync.rfcindex import mark_rfcindex_as_dirty -from ietf.sync.tasks import process_rpc_queue_task +from ietf.sync.tasks import process_rpc_queue_task, update_rfc_json_task class Conflict(APIException): @@ -297,6 +297,8 @@ def perform_update(self, serializer): desc="Metadata update from RFC Editor", ) super().perform_update(serializer) + rfc_number = serializer.instance.rfc_number + transaction.on_commit(lambda: update_rfc_json_task.delay([rfc_number])) @action(detail=False, serializer_class=OriginalStreamSerializer) def rfc_original_stream(self, request): @@ -457,6 +459,11 @@ def post(self, request): ) rfc_number_list = sorted(set(rfc_number_list)) signal_update_rfc_metadata_task.delay(rfc_number_list=rfc_number_list) + related_numbers = sorted( + {d.rfc_number for d in rfc.related_that_doc(("updates", "obs"))} + ) + if related_numbers: + transaction.on_commit(lambda: update_rfc_json_task.delay(related_numbers)) return Response(NotificationAckSerializer().data) diff --git a/ietf/doc/tests_utils_rfc_json.py b/ietf/doc/tests_utils_rfc_json.py new file mode 100644 index 0000000000..890bb95dd0 --- /dev/null +++ b/ietf/doc/tests_utils_rfc_json.py @@ -0,0 +1,333 @@ +# Copyright The IETF Trust 2026, All Rights Reserved + +import json + +from django.core.files.base import ContentFile +from django.core.files.storage import storages +from django.test.utils import override_settings + +from ietf.doc.factories import ( + PublishedRfcDocEventFactory, + RfcAuthorFactory, + RfcFactory, + WgRfcFactory, +) +from ietf.doc.models import RelatedDocument +from ietf.doc.utils_rfc_json import generate_rfc_json +from ietf.group.factories import GroupFactory +from ietf.name.models import StdLevelName +from ietf.utils.test_utils import TestCase + + +def _put_pub_levels(rfc_number, slug, path="input/"): + """Write a minimal publication-std-levels.json to the red bucket.""" + red_bucket = storages["red_bucket"] + red_bucket.save( + f"{path}publication-std-levels.json", + ContentFile( + json.dumps([{"number": rfc_number, "publication_std_level": slug}]) + ), + ) + + +def _put_errata(rfc_number, path="other/errata.json"): + """Write an errata.json with one entry for the given RFC.""" + red_bucket = storages["red_bucket"] + red_bucket.save( + path, + ContentFile( + json.dumps( + [{"doc-id": f"RFC{rfc_number}", "errata_status_code": "Reported"}] + ) + ), + ) + + +def _put_empty_errata(path="other/errata.json"): + red_bucket = storages["red_bucket"] + red_bucket.save(path, ContentFile(json.dumps([]))) + + +def _put_april_first(rfc_number, path="input/"): + red_bucket = storages["red_bucket"] + red_bucket.save( + f"{path}april-first-rfc-numbers.json", + ContentFile(json.dumps([rfc_number])), + ) + + +def _read_json(rfc_number): + from ietf.blobdb.models import Blob + + blob = Blob.objects.get(bucket="rfc", name=f"json/rfc{rfc_number}.json") + return json.loads(bytes(blob.content)) + + +@override_settings( + RFCINDEX_INPUT_PATH="input/", + ERRATA_JSON_BLOB_NAME="other/errata.json", + RFC_EDITOR_ERRATA_BASE_URL="https://www.rfc-editor.org/errata/", +) +class GenerateRfcJsonTests(TestCase): + def setUp(self): + super().setUp() + # Minimal red_bucket blobs required by all tests + red_bucket = storages["red_bucket"] + red_bucket.save( + "input/april-first-rfc-numbers.json", ContentFile(json.dumps([])) + ) + + def tearDown(self): + red_bucket = storages["red_bucket"] + for name in [ + "input/publication-std-levels.json", + "input/april-first-rfc-numbers.json", + "other/errata.json", + ]: + try: + red_bucket.delete(name) + except Exception: + pass + super().tearDown() + + def test_missing_rfc_logs_and_returns(self): + """Calling for a nonexistent RFC number logs and returns without raising.""" + # Should not raise; no blob should be written + generate_rfc_json(999999, pub_levels={}) + from ietf.blobdb.models import Blob + + self.assertFalse( + Blob.objects.filter(bucket="rfc", name="json/rfc999999.json").exists() + ) + + def test_all_fields(self): + """All 17 JSON fields are populated correctly from a fully-populated RFC.""" + area = GroupFactory(type_id="area") + wg = GroupFactory(type_id="wg", parent=area) + rfc = PublishedRfcDocEventFactory( + time="2021-05-01T00:00:00Z", + doc=WgRfcFactory( + group=wg, + stream_id="ietf", + std_level_id="ps", + pages=42, + abstract="Test abstract.", + keywords=["foo", "bar"], + ), + ).doc + author = RfcAuthorFactory(document=rfc, is_editor=False) + editor = RfcAuthorFactory(document=rfc, is_editor=True) + + obsoletes_rfc = RfcFactory() + updated_rfc = RfcFactory() + RelatedDocument.objects.create( + source=rfc, target=obsoletes_rfc, relationship_id="obs" + ) + RelatedDocument.objects.create( + source=rfc, target=updated_rfc, relationship_id="updates" + ) + obsoleted_by_rfc = RfcFactory() + updated_by_rfc = RfcFactory() + RelatedDocument.objects.create( + source=obsoleted_by_rfc, target=rfc, relationship_id="obs" + ) + RelatedDocument.objects.create( + source=updated_by_rfc, target=rfc, relationship_id="updates" + ) + + _put_pub_levels(rfc.rfc_number, "ps") + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number) + data = _read_json(rfc.rfc_number) + + self.assertEqual(data["doc_id"], f"RFC{rfc.rfc_number}") + self.assertEqual(data["title"], rfc.title) + self.assertEqual(data["abstract"], "Test abstract.") + self.assertEqual(data["page_count"], "42") + self.assertEqual(data["pub_status"], "PROPOSED STANDARD") + self.assertEqual(data["status"], "PROPOSED STANDARD") + self.assertEqual(data["pub_date"], "May 2021") + self.assertEqual(data["keywords"], ["foo", "bar"]) + self.assertEqual(data["see_also"], []) + self.assertEqual(data["doi"], f"10.17487/RFC{rfc.rfc_number}") + self.assertIsNone(data["errata_url"]) + self.assertIsNone(data["draft"]) + + # authors — non-editor first (lower order), then editor + self.assertEqual( + data["authors"], + [author.titlepage_name, f"{editor.titlepage_name}, Ed."], + ) + + # relationships + self.assertIn(f"RFC{obsoletes_rfc.rfc_number}", data["obsoletes"]) + self.assertIn(f"RFC{updated_rfc.rfc_number}", data["updates"]) + self.assertIn(f"RFC{obsoleted_by_rfc.rfc_number}", data["obsoleted_by"]) + self.assertIn(f"RFC{updated_by_rfc.rfc_number}", data["updated_by"]) + + def test_pub_status_differs_from_status(self): + """pub_status reflects publication-std-levels.json; status reflects current std_level.""" + rfc = PublishedRfcDocEventFactory( + doc=WgRfcFactory(std_level_id="hist"), + ).doc + # Record was published as "ps" but is now "hist" + _put_pub_levels(rfc.rfc_number, "ps") + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number) + data = _read_json(rfc.rfc_number) + + self.assertEqual(data["pub_status"], "PROPOSED STANDARD") + self.assertEqual(data["status"], "HISTORIC") + + def test_errata_url_set_when_errata_exist(self): + """errata_url is populated when errata.json has any entry for the RFC.""" + rfc = PublishedRfcDocEventFactory(doc=WgRfcFactory()).doc + _put_pub_levels(rfc.rfc_number, "ps") + _put_errata(rfc.rfc_number) + + generate_rfc_json(rfc.rfc_number) + data = _read_json(rfc.rfc_number) + + self.assertEqual( + data["errata_url"], + f"https://www.rfc-editor.org/errata/rfc{rfc.rfc_number}", + ) + + def test_errata_url_none_when_no_errata(self): + """errata_url is None when errata.json has no entries for the RFC.""" + rfc = PublishedRfcDocEventFactory(doc=WgRfcFactory()).doc + _put_pub_levels(rfc.rfc_number, "ps") + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number) + data = _read_json(rfc.rfc_number) + + self.assertIsNone(data["errata_url"]) + + def test_errata_failure_yields_null_url(self): + """If reading errata.json fails, errata_url is null and no exception is raised.""" + rfc = PublishedRfcDocEventFactory(doc=WgRfcFactory()).doc + _put_pub_levels(rfc.rfc_number, "ps") + # Deliberately do not put errata blob — FileNotFoundError expected + + generate_rfc_json(rfc.rfc_number) # must not raise + data = _read_json(rfc.rfc_number) + self.assertIsNone(data["errata_url"]) + + def test_second_call_overwrites(self): + """Calling generate_rfc_json twice does not raise AlreadyExistsError.""" + rfc = PublishedRfcDocEventFactory(doc=WgRfcFactory()).doc + _put_pub_levels(rfc.rfc_number, "ps") + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number) + generate_rfc_json(rfc.rfc_number) # must not raise + + def test_april_first_date_format(self): + """April Fools RFCs get '1 April YYYY' date format.""" + rfc = PublishedRfcDocEventFactory( + time="2020-04-01T12:00:00Z", + doc=WgRfcFactory(), + ).doc + red_bucket = storages["red_bucket"] + red_bucket.delete("input/april-first-rfc-numbers.json") + _put_april_first(rfc.rfc_number) + _put_pub_levels(rfc.rfc_number, "inf") + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number) + data = _read_json(rfc.rfc_number) + + self.assertEqual(data["pub_date"], "1 April 2020") + + def test_non_april_first_april_date(self): + """An April publication that is NOT in the April Fools list gets 'April YYYY'.""" + rfc = PublishedRfcDocEventFactory( + time="2020-04-15T12:00:00Z", + doc=WgRfcFactory(), + ).doc + _put_pub_levels(rfc.rfc_number, "inf") + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number) + data = _read_json(rfc.rfc_number) + + self.assertEqual(data["pub_date"], "April 2020") + + def test_source_ietf_wg(self): + """IETF-stream WG RFC: source is 'acronym (area)'.""" + area = GroupFactory(type_id="area") + wg = GroupFactory(type_id="wg", parent=area) + rfc = PublishedRfcDocEventFactory( + doc=WgRfcFactory(group=wg, stream_id="ietf"), + ).doc + _put_pub_levels(rfc.rfc_number, "ps") + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number) + data = _read_json(rfc.rfc_number) + + self.assertEqual(data["source"], f"{wg.acronym} ({area.acronym})") + + def test_source_ietf_no_wg(self): + """IETF-stream individual RFC (group acronym 'none'): source is 'IETF - NON WORKING GROUP'.""" + rfc = PublishedRfcDocEventFactory( + doc=RfcFactory( + group=GroupFactory(acronym="none"), + stream_id="ietf", + ), + ).doc + _put_pub_levels(rfc.rfc_number, "inf") + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number) + data = _read_json(rfc.rfc_number) + + self.assertEqual(data["source"], "IETF - NON WORKING GROUP") + + def test_source_iab(self): + """IAB-stream RFC: source is 'IAB'.""" + rfc = PublishedRfcDocEventFactory( + doc=RfcFactory(stream_id="iab"), + ).doc + _put_pub_levels(rfc.rfc_number, "inf") + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number) + data = _read_json(rfc.rfc_number) + + self.assertEqual(data["source"], "IAB") + + def test_source_ise(self): + """ISE-stream RFC: source is 'INDEPENDENT'.""" + rfc = PublishedRfcDocEventFactory( + doc=RfcFactory(stream_id="ise"), + ).doc + _put_pub_levels(rfc.rfc_number, "inf") + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number) + data = _read_json(rfc.rfc_number) + + self.assertEqual(data["source"], "INDEPENDENT") + + def test_pub_levels_passed_in(self): + """When pub_levels is passed in, get_publication_std_levels() is not called.""" + import mock + + rfc = PublishedRfcDocEventFactory(doc=WgRfcFactory()).doc + _put_empty_errata() + + ps_level = StdLevelName.objects.get(slug="ps") + pub_levels = {rfc.rfc_number: ps_level} + + with mock.patch( + "ietf.doc.utils_rfc_json.get_publication_std_levels" + ) as mock_get: + generate_rfc_json(rfc.rfc_number, pub_levels=pub_levels) + mock_get.assert_not_called() + + data = _read_json(rfc.rfc_number) + self.assertEqual(data["pub_status"], "PROPOSED STANDARD") diff --git a/ietf/doc/utils_rfc_json.py b/ietf/doc/utils_rfc_json.py new file mode 100644 index 0000000000..baca2c3357 --- /dev/null +++ b/ietf/doc/utils_rfc_json.py @@ -0,0 +1,197 @@ +# Copyright The IETF Trust 2026, All Rights Reserved + +import json +from pathlib import Path + +from django.conf import settings +from django.utils import timezone + +from ietf.doc.models import Document, RelatedDocument +from ietf.doc.storage_utils import exists_in_storage, store_bytes +from ietf.sync.errata import errata_map_from_json, get_errata_data +from ietf.sync.rfcindex import get_april1_rfc_numbers, get_publication_std_levels +from ietf.utils.log import log + + +_FORMAT_CHECKS = [ + ("xml", "XML"), + ("txt", "TEXT"), + ("html", "HTML"), + ("pdf", "PDF"), +] + + +def generate_rfc_json(rfc_number: int, *, pub_levels=None) -> None: + """Generate and store the JSON metadata file for a published RFC. + + Reads RFC metadata from the DB and errata data from the red bucket, combines + them, and writes json/rfc{N}.json to the "rfc" blob bucket (overwriting any + existing file). + + pub_levels, if provided, should be the defaultdict returned by + get_publication_std_levels(). Pass it when generating JSON for multiple RFCs + to avoid a redundant blob read per call. + """ + try: + rfc = ( + Document.objects.select_related("std_level", "stream", "group__parent") + .prefetch_related("rfcauthor_set") + .get(type_id="rfc", rfc_number=rfc_number) + ) + except Document.DoesNotExist: + log(f"generate_rfc_json: no RFC found for rfc_number={rfc_number}") + return + + doc_id = f"RFC{rfc_number}" + + # draft name + draft_doc = rfc.came_from_draft() + draft = draft_doc.name if draft_doc else None + + # authors: ordered list of display strings + authors = [] + for author in rfc.rfcauthor_set.order_by("order"): + name = author.titlepage_name + if author.is_editor: + name = f"{name}, Ed." + authors.append(name) + + # format: check which file blobs are present + formats = [ + label + for ext, label in _FORMAT_CHECKS + if exists_in_storage(kind="rfc", name=f"{ext}/rfc{rfc_number}.{ext}") + ] + + # page_count + page_count = str(rfc.pages) if rfc.pages is not None else "" + + # pub_status from publication-std-levels.json in the red bucket + if pub_levels is None: + pub_levels = get_publication_std_levels() + pub_status = pub_levels[rfc_number].name.upper() + + # status: current std_level + status = rfc.std_level.name.upper() if rfc.std_level else "" + + # source: adapted from errata system's display_source() logic + stream_slug = rfc.stream.slug if rfc.stream else "" + group_acronym = rfc.group.acronym if rfc.group else "none" + area_acronym = rfc.group.parent.acronym if rfc.group and rfc.group.parent else "" + + if stream_slug == "ise": + source = "INDEPENDENT" + elif stream_slug == "iab": + source = "IAB" + elif stream_slug == "ietf" and ( + group_acronym in ("none", "gen") or not area_acronym + ): + source = "IETF - NON WORKING GROUP" + elif group_acronym not in ("none", ""): + source = group_acronym + if stream_slug == "ietf" and area_acronym: + source += f" ({area_acronym})" + elif stream_slug: + source += f" ({stream_slug})" + elif stream_slug: + source = "Legacy" if stream_slug == "legacy" else stream_slug.upper() + else: + source = "" + + # pub_date: month/year of publication, with April 1st special-casing + pub_event = rfc.latest_event(type="published_rfc") + pub_date = None + if pub_event: + dt = pub_event.time + try: + april_first_numbers = get_april1_rfc_numbers() + except Exception: + april_first_numbers = [] + if dt.month == 4 and rfc_number in april_first_numbers: + pub_date = dt.strftime("1 %B %Y") + else: + pub_date = dt.strftime("%B %Y") + + # relationship lists — sorted by RFC number + def _rfc_list(qs, attr): + numbers = [ + getattr(rd, attr).rfc_number + for rd in qs + if getattr(rd, attr).rfc_number is not None + ] + return [f"RFC{n}" for n in sorted(numbers)] + + obsoletes = _rfc_list( + RelatedDocument.objects.filter( + source=rfc, relationship_id="obs" + ).select_related("target"), + "target", + ) + obsoleted_by = _rfc_list( + RelatedDocument.objects.filter( + target=rfc, relationship_id="obs" + ).select_related("source"), + "source", + ) + updates = _rfc_list( + RelatedDocument.objects.filter( + source=rfc, relationship_id="updates" + ).select_related("target"), + "target", + ) + updated_by = _rfc_list( + RelatedDocument.objects.filter( + target=rfc, relationship_id="updates" + ).select_related("source"), + "source", + ) + + # errata_url: non-None if any errata entry exists for this RFC (any status) + try: + errata_data = get_errata_data() + errata_map = errata_map_from_json(errata_data) + errata_url = ( + settings.RFC_EDITOR_ERRATA_BASE_URL + f"rfc{rfc_number}" + if rfc_number in errata_map + else None + ) + except Exception: + log(f"generate_rfc_json: could not load errata data for RFC {rfc_number}") + errata_url = None + + data = { + "draft": draft, + "doc_id": doc_id, + "title": rfc.title, + "authors": authors, + "format": formats, + "page_count": page_count, + "pub_status": pub_status, + "status": status, + "source": source, + "abstract": rfc.abstract, + "pub_date": pub_date, + "keywords": rfc.keywords, + "obsoletes": obsoletes, + "obsoleted_by": obsoleted_by, + "updates": updates, + "updated_by": updated_by, + "see_also": [], + "doi": f"10.17487/{doc_id}", + "errata_url": errata_url, + } + + content = json.dumps(data, indent=2).encode("utf-8") + store_bytes( + kind="rfc", + name=f"json/rfc{rfc_number}.json", + content=content, + allow_overwrite=True, + doc_name=f"rfc{rfc_number}", + doc_rev=None, + mtime=timezone.now(), + ) + fs_path = Path(settings.RFC_PATH) / f"rfc{rfc_number}.json" + if settings.SERVER_MODE != "production" and not fs_path.parent.exists(): + fs_path.parent.mkdir() + fs_path.write_bytes(content) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 113d987291..bc1a1cbd28 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -20,6 +20,7 @@ type ErrataJsonEntry = dict[str, str] + def get_errata_last_updated() -> datetime.datetime: """Get timestamp of the last errata.json update @@ -123,10 +124,12 @@ def update_errata_tags(errata_data: list[ErrataJsonEntry]): desc=summary, ) + return {rfc.rfc_number for rfc in changes} + -def update_errata_from_rfceditor(): +def update_errata_from_rfceditor() -> set[int]: errata_data = get_errata_data() - update_errata_tags(errata_data) + return update_errata_tags(errata_data) ## DirtyBits management for the errata tags diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 3808fb1db5..a582d85bf8 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -302,9 +302,24 @@ def update_errata_from_rfceditor_task(): # new_processed_time is the *start* of processing so that any changes after # this point will trigger another refresh new_processed_time = timezone.now() - update_errata_from_rfceditor() + changed_numbers = update_errata_from_rfceditor() mark_errata_as_processed(new_processed_time) mark_rfcindex_as_dirty() # ensure any changes are reflected in the indexes + if changed_numbers: + update_rfc_json_task.delay(list(changed_numbers)) + + +@shared_task +def update_rfc_json_task(rfc_numbers: list[int]) -> None: + from ietf.doc.utils_rfc_json import generate_rfc_json + from ietf.sync.rfcindex import get_publication_std_levels + + pub_levels = get_publication_std_levels() + for rfc_number in rfc_numbers: + try: + generate_rfc_json(rfc_number, pub_levels=pub_levels) + except Exception as e: + log.log(f"update_rfc_json_task: failed for RFC {rfc_number}: {e}") @shared_task diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index 207c78cf6a..b41c00e7a6 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -922,11 +922,14 @@ def test_errata_map_from_json(self): @mock.patch("ietf.sync.errata.get_errata_data") def test_update_errata_from_rfceditor(self, mock_get_data, mock_update): fake_data = object() + fake_changed = {1234, 5678} mock_get_data.return_value = fake_data - update_errata_from_rfceditor() + mock_update.return_value = fake_changed + result = update_errata_from_rfceditor() self.assertTrue(mock_get_data.called) self.assertTrue(mock_update.called) self.assertEqual(mock_update.call_args, mock.call(fake_data)) + self.assertEqual(result, fake_changed) def test_update_errata_tags(self): tag_has_errata = DocTagName.objects.get(slug="errata") @@ -964,7 +967,7 @@ def test_update_errata_tags(self): {"doc-id": rfcs[8].name, "errata_status_code": "Rejected"}, # rfcs[9] had none and it should stay that way (no entry at all) ] - update_errata_tags(errata_data) + changed = update_errata_tags(errata_data) self.assertCountEqual(rfcs[0].tags.all(), [tag_has_errata]) self.assertIsNone(rfcs[0].docevent_set.first()) # no change @@ -1018,6 +1021,14 @@ def test_update_errata_tags(self): self.assertCountEqual(rfcs[9].tags.all(), []) self.assertIsNone(rfcs[9].docevent_set.first()) # no change + # return value: only RFCs whose tags actually changed + # rfcs[0], rfcs[1], rfcs[8], rfcs[9] had no tag changes + for unchanged_rfc in (rfcs[0], rfcs[1], rfcs[8], rfcs[9]): + self.assertNotIn(unchanged_rfc.rfc_number, changed) + # rfcs[2..7] had tag changes + for changed_rfc in rfcs[2:8]: + self.assertIn(changed_rfc.rfc_number, changed) + @override_settings(ERRATA_JSON_BLOB_NAME="myblob.json") @mock.patch("ietf.sync.errata.get_errata_last_updated") def test_update_errata_dirty_time(self, mock_last_updated): diff --git a/ietf/sync/tests_tasks.py b/ietf/sync/tests_tasks.py index 6657dd617a..edfd080079 100644 --- a/ietf/sync/tests_tasks.py +++ b/ietf/sync/tests_tasks.py @@ -1,5 +1,6 @@ # Copyright The IETF Trust 2026, All Rights Reserved +import mock from django.test.utils import override_settings from ietf.doc.factories import WgDraftFactory @@ -474,3 +475,64 @@ def test_docs_in_queue_retain_rfceditor_state(self): draft = Document.objects.get(pk=draft.pk) self.assertIsNotNone(draft.get_state("draft-rfceditor")) + + +class UpdateErrataFromRfcEditorTaskTests(TestCase): + @mock.patch("ietf.sync.tasks.update_rfc_json_task.delay") + @mock.patch("ietf.sync.tasks.update_errata_from_rfceditor") + @mock.patch("ietf.sync.tasks.mark_rfcindex_as_dirty") + @mock.patch("ietf.sync.tasks.mark_errata_as_processed") + @mock.patch("ietf.sync.tasks.errata_are_dirty") + def test_no_update_when_not_dirty( + self, + mock_dirty, + mock_mark_processed, + mock_mark_index, + mock_update, + mock_json_delay, + ): + """When errata are not dirty nothing runs.""" + mock_dirty.return_value = False + tasks.update_errata_from_rfceditor_task() + mock_update.assert_not_called() + mock_json_delay.assert_not_called() + + @mock.patch("ietf.sync.tasks.update_rfc_json_task.delay") + @mock.patch("ietf.sync.tasks.update_errata_from_rfceditor") + @mock.patch("ietf.sync.tasks.mark_rfcindex_as_dirty") + @mock.patch("ietf.sync.tasks.mark_errata_as_processed") + @mock.patch("ietf.sync.tasks.errata_are_dirty") + def test_json_task_called_for_changed_rfcs( + self, + mock_dirty, + mock_mark_processed, + mock_mark_index, + mock_update, + mock_json_delay, + ): + """update_rfc_json_task is dispatched with the changed RFC numbers.""" + mock_dirty.return_value = True + mock_update.return_value = {3261, 9000} + tasks.update_errata_from_rfceditor_task() + mock_json_delay.assert_called_once() + called_numbers = mock_json_delay.call_args[0][0] + self.assertCountEqual(called_numbers, [3261, 9000]) + + @mock.patch("ietf.sync.tasks.update_rfc_json_task.delay") + @mock.patch("ietf.sync.tasks.update_errata_from_rfceditor") + @mock.patch("ietf.sync.tasks.mark_rfcindex_as_dirty") + @mock.patch("ietf.sync.tasks.mark_errata_as_processed") + @mock.patch("ietf.sync.tasks.errata_are_dirty") + def test_json_task_not_called_when_no_changes( + self, + mock_dirty, + mock_mark_processed, + mock_mark_index, + mock_update, + mock_json_delay, + ): + """update_rfc_json_task is not dispatched when no errata tags changed.""" + mock_dirty.return_value = True + mock_update.return_value = set() + tasks.update_errata_from_rfceditor_task() + mock_json_delay.assert_not_called() From 7ea29247d73c2154a0d44d5d5355b4cb38e7eff8 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Mon, 1 Jun 2026 16:31:53 -0500 Subject: [PATCH 2/2] fix: resilience around fetch of publication status --- ietf/doc/tests_utils_rfc_json.py | 53 ++++++++++++++++++++++++++++++-- ietf/doc/utils_rfc_json.py | 30 ++++++++++++++---- ietf/sync/tasks.py | 6 +++- 3 files changed, 80 insertions(+), 9 deletions(-) diff --git a/ietf/doc/tests_utils_rfc_json.py b/ietf/doc/tests_utils_rfc_json.py index 890bb95dd0..34d7fca8eb 100644 --- a/ietf/doc/tests_utils_rfc_json.py +++ b/ietf/doc/tests_utils_rfc_json.py @@ -1,10 +1,13 @@ # Copyright The IETF Trust 2026, All Rights Reserved +import datetime import json +from unittest import mock from django.core.files.base import ContentFile from django.core.files.storage import storages from django.test.utils import override_settings +from django.utils import timezone from ietf.doc.factories import ( PublishedRfcDocEventFactory, @@ -315,8 +318,6 @@ def test_source_ise(self): def test_pub_levels_passed_in(self): """When pub_levels is passed in, get_publication_std_levels() is not called.""" - import mock - rfc = PublishedRfcDocEventFactory(doc=WgRfcFactory()).doc _put_empty_errata() @@ -331,3 +332,51 @@ def test_pub_levels_passed_in(self): data = _read_json(rfc.rfc_number) self.assertEqual(data["pub_status"], "PROPOSED STANDARD") + + def test_pub_levels_fetch_failure_returns_without_writing(self): + """If get_publication_std_levels() raises, function logs and returns without writing a blob.""" + rfc = PublishedRfcDocEventFactory(doc=WgRfcFactory()).doc + _put_empty_errata() + + with mock.patch( + "ietf.doc.utils_rfc_json.get_publication_std_levels", + side_effect=FileNotFoundError("not found"), + ): + generate_rfc_json(rfc.rfc_number) # must not raise + + from ietf.blobdb.models import Blob + + self.assertFalse( + Blob.objects.filter( + bucket="rfc", name=f"json/rfc{rfc.rfc_number}.json" + ).exists() + ) + + def test_pub_status_fallback_to_status_for_recent_rfc(self): + """RFC missing from pub_levels but published within 2 days: pub_status falls back to current std_level.""" + now = timezone.now() + rfc = PublishedRfcDocEventFactory( + time=now - datetime.timedelta(hours=1), + doc=WgRfcFactory(std_level_id="ps"), + ).doc + _put_empty_errata() + + with mock.patch("ietf.doc.utils_rfc_json.timezone") as mock_tz: + mock_tz.now.return_value = now + generate_rfc_json(rfc.rfc_number, pub_levels={}) + + data = _read_json(rfc.rfc_number) + self.assertEqual(data["pub_status"], "PROPOSED STANDARD") + + def test_pub_status_unknown_for_old_rfc_missing_from_levels(self): + """RFC missing from pub_levels and published more than 2 days ago: pub_status is UNKNOWN.""" + rfc = PublishedRfcDocEventFactory( + time="2020-01-01T00:00:00Z", + doc=WgRfcFactory(std_level_id="ps"), + ).doc + _put_empty_errata() + + generate_rfc_json(rfc.rfc_number, pub_levels={}) + + data = _read_json(rfc.rfc_number) + self.assertEqual(data["pub_status"], "UNKNOWN") diff --git a/ietf/doc/utils_rfc_json.py b/ietf/doc/utils_rfc_json.py index baca2c3357..1422d18c7e 100644 --- a/ietf/doc/utils_rfc_json.py +++ b/ietf/doc/utils_rfc_json.py @@ -1,5 +1,6 @@ # Copyright The IETF Trust 2026, All Rights Reserved +import datetime import json from pathlib import Path @@ -7,6 +8,7 @@ from django.utils import timezone from ietf.doc.models import Document, RelatedDocument +from ietf.name.models import StdLevelName from ietf.doc.storage_utils import exists_in_storage, store_bytes from ietf.sync.errata import errata_map_from_json, get_errata_data from ietf.sync.rfcindex import get_april1_rfc_numbers, get_publication_std_levels @@ -42,6 +44,13 @@ def generate_rfc_json(rfc_number: int, *, pub_levels=None) -> None: log(f"generate_rfc_json: no RFC found for rfc_number={rfc_number}") return + if pub_levels is None: + try: + pub_levels = get_publication_std_levels() + except Exception as e: + log(f"generate_rfc_json: failed to get publication std levels: {e}") + return + doc_id = f"RFC{rfc_number}" # draft name @@ -66,14 +75,24 @@ def generate_rfc_json(rfc_number: int, *, pub_levels=None) -> None: # page_count page_count = str(rfc.pages) if rfc.pages is not None else "" - # pub_status from publication-std-levels.json in the red bucket - if pub_levels is None: - pub_levels = get_publication_std_levels() - pub_status = pub_levels[rfc_number].name.upper() - # status: current std_level status = rfc.std_level.name.upper() if rfc.std_level else "" + # pub_status from publication-std-levels.json in the red bucket + # but guard against recent publication not having updated the bucket yet + pub_event = rfc.latest_event(type="published_rfc") + if rfc_number in pub_levels: + pub_status = pub_levels[rfc_number].name.upper() + else: + if ( + pub_event is not None + and timezone.now() - pub_event.time < datetime.timedelta(days=2) + ): + pub_status = status + else: + log(f"Assuming an unknown publication status for rfc{rfc_number}") + pub_status = StdLevelName.objects.get(slug="unkn").name.upper() + # source: adapted from errata system's display_source() logic stream_slug = rfc.stream.slug if rfc.stream else "" group_acronym = rfc.group.acronym if rfc.group else "none" @@ -99,7 +118,6 @@ def generate_rfc_json(rfc_number: int, *, pub_levels=None) -> None: source = "" # pub_date: month/year of publication, with April 1st special-casing - pub_event = rfc.latest_event(type="published_rfc") pub_date = None if pub_event: dt = pub_event.time diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index a582d85bf8..4ccd5db4bb 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -314,7 +314,11 @@ def update_rfc_json_task(rfc_numbers: list[int]) -> None: from ietf.doc.utils_rfc_json import generate_rfc_json from ietf.sync.rfcindex import get_publication_std_levels - pub_levels = get_publication_std_levels() + try: + pub_levels = get_publication_std_levels() + except Exception as e: + log.log(f"update_rfc_json_task: failed to get publication std levels: {e}") + return for rfc_number in rfc_numbers: try: generate_rfc_json(rfc_number, pub_levels=pub_levels)