From 98d6bb51481fbba98aefafc4fa3c116e9fc9f57c Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 12:40:03 -0300 Subject: [PATCH 01/12] feat: utils to sync errata tags from errata.json --- ietf/sync/errata.py | 108 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 ietf/sync/errata.py diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py new file mode 100644 index 0000000000..ca826687ec --- /dev/null +++ b/ietf/sync/errata.py @@ -0,0 +1,108 @@ +# Copyright The IETF Trust 2026, All Rights Reserved +import datetime +import json +from collections import defaultdict +from typing import DefaultDict, Literal + +from django.core.files.storage import storages + +from ietf.doc.models import Document, DocEvent +from ietf.name.models import DocTagName +from ietf.person.models import Person + +ERRATA_BLOB_NAME = "other/errata.json" # name of errata.json in the red bucket + +def get_errata_last_updated() -> datetime.datetime: + """Get timestamp of the last errata.json update + + May raise FileNotFoundError or other storage/S3 exceptions. Be prepared. + """ + red_bucket = storages["red_bucket"] + return red_bucket.get_modified_time(ERRATA_BLOB_NAME) + + +def get_errata_data(): + red_bucket = storages["red_bucket"] + with red_bucket.open(ERRATA_BLOB_NAME, "r") as f: + errata_data = json.load(f) + return errata_data + + +def errata_map_from_json(errata_data): + """Create a dict mapping RFC number to a list of applicable errata records""" + errata = defaultdict(list) + for item in errata_data: + doc_id = item["doc-id"] + if doc_id.upper().startswith("RFC"): + rfc_number = int(doc_id[3:]) + errata[rfc_number].append(item) + return dict(errata) + + +def update_errata_tags(errata_data): + tag_has_errata = DocTagName.objects.get(slug="errata") + tag_has_verified_errata = DocTagName.objects.get(slug="verified-errata") + system = Person.objects.get(name="(System)") + + errata_map = errata_map_from_json(errata_data) + nums_with_errata = [ + num + for num, errata in errata_map.items() + if any(er["errata_status_code"] != "Rejected" for er in errata) + ] + nums_with_verified_errata = [ + num + for num, errata in errata_map.items() + if any(er["errata_status_code"] == "Verified" for er in errata) + ] + + rfcs_gaining_errata_tag = Document.objects.filter( + type_id="rfc", rfc_number__in=nums_with_errata + ).exclude(tags=tag_has_errata) + + rfcs_gaining_verified_errata_tag = Document.objects.filter( + type_id="rfc", rfc_number__in=nums_with_verified_errata + ).exclude(tags=tag_has_verified_errata) + + rfcs_losing_errata_tag = Document.objects.filter( + type_id="rfc", tags=tag_has_errata + ).exclude(rfc_number__in=nums_with_errata) + + rfcs_losing_verified_errata_tag = Document.objects.filter( + type_id="rfc", tags=tag_has_verified_errata + ).exclude(rfc_number__in=nums_with_verified_errata) + + # map rfc_number to add/remove lists + changes: DefaultDict[Document, dict[str, list[DocTagName]]] = defaultdict( + lambda: {"add": [], "remove": []} + ) + for rfc in rfcs_gaining_errata_tag: + changes[rfc]["add"].append(tag_has_errata) + for rfc in rfcs_gaining_verified_errata_tag: + changes[rfc]["add"].append(tag_has_verified_errata) + for rfc in rfcs_losing_errata_tag: + changes[rfc]["remove"].append(tag_has_errata) + for rfc in rfcs_losing_verified_errata_tag: + changes[rfc]["remove"].append(tag_has_verified_errata) + + for rfc, changeset in changes.items(): + change_descs = [] + for tag in changeset["add"]: + rfc.tags.add(tag) + change_descs.append(f"added {tag.slug} tag") + for tag in changeset["remove"]: + rfc.tags.remove(tag) + change_descs.append(f"removed {tag.slug} tag") + summary = "Update from RFC Editor: " + ", ".join(change_descs) + if all( + er["errata_status_code"] == "Rejected" + for er in errata_map[rfc.rfc_number] + ): + summary += " (all errata rejected)" + DocEvent.objects.create( + doc=rfc, + rev=rfc.rev, # expect no rev + by=system, + type="sync_from_rfc_editor", + desc=summary + ) From c01380714f0c7d2b0e44dc5a5c9a8f11d3c73cf6 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 13:56:13 -0300 Subject: [PATCH 02/12] feat: DirtyBits for errata processing --- ietf/sync/errata.py | 57 +++++++++++++++++++ .../migrations/0004_alter_dirtybits_slug.py | 21 +++++++ ietf/utils/models.py | 1 + 3 files changed, 79 insertions(+) create mode 100644 ietf/utils/migrations/0004_alter_dirtybits_slug.py diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index ca826687ec..1e8e122eae 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -5,10 +5,13 @@ from typing import DefaultDict, Literal from django.core.files.storage import storages +from django.db.models import Q from ietf.doc.models import Document, DocEvent from ietf.name.models import DocTagName from ietf.person.models import Person +from ietf.utils.log import log +from ietf.utils.models import DirtyBits ERRATA_BLOB_NAME = "other/errata.json" # name of errata.json in the red bucket @@ -106,3 +109,57 @@ def update_errata_tags(errata_data): type="sync_from_rfc_editor", desc=summary ) + + +## DirtyBits management for the errata tags + +ERRATA_SLUG = DirtyBits.Slugs.ERRATA + + +def update_errata_dirty_time() -> DirtyBits | None: + try: + last_update = get_errata_last_updated() + except Exception as err: + log(f"Error in get_errata_last_updated: {err}") + return None + else: + dirty_work, created = DirtyBits.objects.update_or_create( + slug=ERRATA_SLUG, defaults={"dirty_time": last_update} + ) + if created: + log(f"Created DirtyBits(slug='{ERRATA_SLUG}')") + return dirty_work + +def mark_errata_as_processed(when: datetime.datetime): + n_updated = DirtyBits.objects.filter( + Q(processed_time__isnull=True) | Q(processed_time__lt=when), + slug=ERRATA_SLUG, + ).update(processed_time=when) + if n_updated > 0: + log(f"processed_time is now {when.isoformat()}") + else: + log("processed_time not updated, no matching record found") + + +def errata_are_dirty(): + """Does the rfc index need to be updated?""" + dirty_work = update_errata_dirty_time() # creates DirtyBits if needed + if dirty_work is None: + # A None indicates we could not check the timestamp of errata.json. In that + # case, we are not likely to be able to read the blob either, so don't try + # to process it. An error was already logged. + return False + display_processed_time = ( + dirty_work.processed_time.isoformat() + if dirty_work.processed_time is not None + else "never" + ) + log( + f"DirtyBits(slug='{ERRATA_SLUG}'): " + f"dirty_time={dirty_work.dirty_time.isoformat()} " + f"processed_time={display_processed_time}" + ) + return ( + dirty_work.processed_time is None + or dirty_work.dirty_time >= dirty_work.processed_time + ) diff --git a/ietf/utils/migrations/0004_alter_dirtybits_slug.py b/ietf/utils/migrations/0004_alter_dirtybits_slug.py new file mode 100644 index 0000000000..e17ea6cadd --- /dev/null +++ b/ietf/utils/migrations/0004_alter_dirtybits_slug.py @@ -0,0 +1,21 @@ +# Copyright The IETF Trust 2026, All Rights Reserved + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("utils", "0003_dirtybits"), + ] + + operations = [ + migrations.AlterField( + model_name="dirtybits", + name="slug", + field=models.CharField( + choices=[("rfcindex", "RFC Index"), ("errata", "Errata Tags")], + max_length=40, + unique=True, + ), + ), + ] diff --git a/ietf/utils/models.py b/ietf/utils/models.py index 13afbdfe20..64f7f253f2 100644 --- a/ietf/utils/models.py +++ b/ietf/utils/models.py @@ -15,6 +15,7 @@ class DirtyBits(models.Model): class Slugs(models.TextChoices): RFCINDEX = "rfcindex", "RFC Index" + ERRATA = "errata", "Errata Tags" # next line can become `...choices=Slugs)` when we get to Django 5.x slug = models.CharField(max_length=40, blank=False, choices=Slugs.choices, unique=True) From 070c0595c58788b159c113004d749da962cf8449 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 14:03:21 -0300 Subject: [PATCH 03/12] feat: errata update task --- ietf/sync/tasks.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 2805f431bf..357075620f 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -17,6 +17,8 @@ from ietf.doc.tasks import rebuild_reference_relations_task from ietf.sync import iana from ietf.sync import rfceditor +from ietf.sync.errata import errata_are_dirty, mark_errata_as_processed, get_errata_data, \ + update_errata_tags from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue from ietf.sync.rfcindex import ( create_bcp_txt_index, @@ -288,6 +290,17 @@ def load_rfcs_into_blobdb_task(start: int, end: int): load_rfcs_into_blobdb(list(range(start, end + 1))) +@shared_task +def update_errata_from_rfceditor_task(): + if errata_are_dirty(): + # new_processed_time is the *start* of processing so that any changes after + # this point will trigger another refresh + new_processed_time = timezone.now() + errata_data = get_errata_data() + update_errata_tags(errata_data) + mark_errata_as_processed(new_processed_time) + + @shared_task def refresh_rfc_index_task(): if rfcindex_is_dirty(): From 9f1e5ceb1d90439653920c7080e6e1fb924e762f Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 14:20:10 -0300 Subject: [PATCH 04/12] feat: rebuild rfc indexes after errata update --- ietf/sync/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 357075620f..ac0fe173ca 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -26,7 +26,7 @@ create_rfc_txt_index, create_rfc_xml_index, create_std_txt_index, - rfcindex_is_dirty, mark_rfcindex_as_processed, + rfcindex_is_dirty, mark_rfcindex_as_processed, mark_rfcindex_as_dirty, ) from ietf.sync.utils import build_from_file_content, load_rfcs_into_blobdb, rsync_helper from ietf.utils import log @@ -299,6 +299,7 @@ def update_errata_from_rfceditor_task(): errata_data = get_errata_data() update_errata_tags(errata_data) mark_errata_as_processed(new_processed_time) + mark_rfcindex_as_dirty() # ensure any changes are reflected in the indexes @shared_task From 4dd9f2a82569b85b6d2e74e418a43b7382edb8af Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 14:32:32 -0300 Subject: [PATCH 05/12] chore: setting for ERRATA_JSON_BLOB_NAME --- ietf/sync/errata.py | 27 +++++++++++++++++---------- k8s/settings_local.py | 5 +++++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 1e8e122eae..0d154bf86e 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -4,6 +4,7 @@ from collections import defaultdict from typing import DefaultDict, Literal +from django.conf import settings from django.core.files.storage import storages from django.db.models import Q @@ -13,20 +14,26 @@ from ietf.utils.log import log from ietf.utils.models import DirtyBits -ERRATA_BLOB_NAME = "other/errata.json" # name of errata.json in the red bucket + +DEFAULT_ERRATA_JSON_BLOB_NAME = "other/errata.json" + def get_errata_last_updated() -> datetime.datetime: """Get timestamp of the last errata.json update - + May raise FileNotFoundError or other storage/S3 exceptions. Be prepared. """ red_bucket = storages["red_bucket"] - return red_bucket.get_modified_time(ERRATA_BLOB_NAME) + return red_bucket.get_modified_time( + getattr(settings, "ERRATA_JSON_BLOB_NAME", DEFAULT_ERRATA_JSON_BLOB_NAME) + ) def get_errata_data(): red_bucket = storages["red_bucket"] - with red_bucket.open(ERRATA_BLOB_NAME, "r") as f: + with red_bucket.open( + getattr(settings, "ERRATA_JSON_BLOB_NAME", DEFAULT_ERRATA_JSON_BLOB_NAME), "r" + ) as f: errata_data = json.load(f) return errata_data @@ -78,10 +85,10 @@ def update_errata_tags(errata_data): # map rfc_number to add/remove lists changes: DefaultDict[Document, dict[str, list[DocTagName]]] = defaultdict( lambda: {"add": [], "remove": []} - ) + ) for rfc in rfcs_gaining_errata_tag: changes[rfc]["add"].append(tag_has_errata) - for rfc in rfcs_gaining_verified_errata_tag: + for rfc in rfcs_gaining_verified_errata_tag: changes[rfc]["add"].append(tag_has_verified_errata) for rfc in rfcs_losing_errata_tag: changes[rfc]["remove"].append(tag_has_errata) @@ -98,8 +105,7 @@ def update_errata_tags(errata_data): change_descs.append(f"removed {tag.slug} tag") summary = "Update from RFC Editor: " + ", ".join(change_descs) if all( - er["errata_status_code"] == "Rejected" - for er in errata_map[rfc.rfc_number] + er["errata_status_code"] == "Rejected" for er in errata_map[rfc.rfc_number] ): summary += " (all errata rejected)" DocEvent.objects.create( @@ -107,7 +113,7 @@ def update_errata_tags(errata_data): rev=rfc.rev, # expect no rev by=system, type="sync_from_rfc_editor", - desc=summary + desc=summary, ) @@ -130,6 +136,7 @@ def update_errata_dirty_time() -> DirtyBits | None: log(f"Created DirtyBits(slug='{ERRATA_SLUG}')") return dirty_work + def mark_errata_as_processed(when: datetime.datetime): n_updated = DirtyBits.objects.filter( Q(processed_time__isnull=True) | Q(processed_time__lt=when), @@ -148,7 +155,7 @@ def errata_are_dirty(): # A None indicates we could not check the timestamp of errata.json. In that # case, we are not likely to be able to read the blob either, so don't try # to process it. An error was already logged. - return False + return False display_processed_time = ( dirty_work.processed_time.isoformat() if dirty_work.processed_time is not None diff --git a/k8s/settings_local.py b/k8s/settings_local.py index 3a7e524f5a..251f11234f 100644 --- a/k8s/settings_local.py +++ b/k8s/settings_local.py @@ -511,3 +511,8 @@ def _multiline_to_list(s): "DATATRACKER_ERRATA_METADATA_NOTIFICATION_URL must be set if " "DATATRACKER_ERRATA_METADATA_NOTIFICATION_API_KEY is provided" ) + +# name (with path) of errata.json in the red bucket +ERRATA_JSON_BLOB_NAME = os.environ.get( + "DATATRACKER_ERRATA_JSON_BLOB_NAME", "other/errata.json" +) From 1b2aced58e6fa43ff5c9579964ac4684fb87824f Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 15:00:35 -0300 Subject: [PATCH 06/12] fix: transaction --- ietf/sync/errata.py | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 0d154bf86e..51531a6190 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -6,6 +6,7 @@ from django.conf import settings from django.core.files.storage import storages +from django.db import transaction from django.db.models import Q from ietf.doc.models import Document, DocEvent @@ -96,25 +97,29 @@ def update_errata_tags(errata_data): changes[rfc]["remove"].append(tag_has_verified_errata) for rfc, changeset in changes.items(): - change_descs = [] - for tag in changeset["add"]: - rfc.tags.add(tag) - change_descs.append(f"added {tag.slug} tag") - for tag in changeset["remove"]: - rfc.tags.remove(tag) - change_descs.append(f"removed {tag.slug} tag") - summary = "Update from RFC Editor: " + ", ".join(change_descs) - if all( - er["errata_status_code"] == "Rejected" for er in errata_map[rfc.rfc_number] - ): - summary += " (all errata rejected)" - DocEvent.objects.create( - doc=rfc, - rev=rfc.rev, # expect no rev - by=system, - type="sync_from_rfc_editor", - desc=summary, - ) + # Update in a transaction per RFC to keep tags and DocEvents consistent. + # With this in place, an interrupted task will be cleanly completed on the + # next run. + with transaction.atomic(): + change_descs = [] + for tag in changeset["add"]: + rfc.tags.add(tag) + change_descs.append(f"added {tag.slug} tag") + for tag in changeset["remove"]: + rfc.tags.remove(tag) + change_descs.append(f"removed {tag.slug} tag") + summary = "Update from RFC Editor: " + ", ".join(change_descs) + if all( + er["errata_status_code"] == "Rejected" for er in errata_map[rfc.rfc_number] + ): + summary += " (all errata rejected)" + DocEvent.objects.create( + doc=rfc, + rev=rfc.rev, # expect no rev + by=system, + type="sync_from_rfc_editor", + desc=summary, + ) ## DirtyBits management for the errata tags From 5bdbdf1ea961542f9aa36e9286222cea37598c41 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 15:14:13 -0300 Subject: [PATCH 07/12] refactor: extract method from task --- ietf/sync/errata.py | 5 +++++ ietf/sync/tasks.py | 10 ++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 51531a6190..81a31e7484 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -175,3 +175,8 @@ def errata_are_dirty(): dirty_work.processed_time is None or dirty_work.dirty_time >= dirty_work.processed_time ) + + +def update_errata_from_rfceditor(): + errata_data = get_errata_data() + update_errata_tags(errata_data) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index ac0fe173ca..34b2efeb5c 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -17,8 +17,11 @@ from ietf.doc.tasks import rebuild_reference_relations_task from ietf.sync import iana from ietf.sync import rfceditor -from ietf.sync.errata import errata_are_dirty, mark_errata_as_processed, get_errata_data, \ - update_errata_tags +from ietf.sync.errata import ( + errata_are_dirty, + mark_errata_as_processed, + update_errata_from_rfceditor, +) from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue from ietf.sync.rfcindex import ( create_bcp_txt_index, @@ -296,8 +299,7 @@ def update_errata_from_rfceditor_task(): # new_processed_time is the *start* of processing so that any changes after # this point will trigger another refresh new_processed_time = timezone.now() - errata_data = get_errata_data() - update_errata_tags(errata_data) + update_errata_from_rfceditor() mark_errata_as_processed(new_processed_time) mark_rfcindex_as_dirty() # ensure any changes are reflected in the indexes From 2c385a0f95a991ff547152dba2f700e7a7c8e3ec Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 15:15:32 -0300 Subject: [PATCH 08/12] test: test_update_errata_from_rfceditor_task --- ietf/sync/tests.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index 21d6cb5cd5..6cbd91d59f 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -1,5 +1,4 @@ -# Copyright The IETF Trust 2012-2020, All Rights Reserved -# -*- coding: utf-8 -*- +# Copyright The IETF Trust 2012-2026, All Rights Reserved import os @@ -33,6 +32,7 @@ from ietf.person.factories import PersonFactory from ietf.person.models import Person from ietf.sync import iana, rfceditor, tasks +from ietf.sync.tasks import update_errata_from_rfceditor_task from ietf.utils.mail import outbox, empty_outbox from ietf.utils.test_utils import login_testing_unauthorized from ietf.utils.test_utils import TestCase @@ -1215,3 +1215,28 @@ def test_load_rfcs_into_blobdb_task( self.assertEqual(mock_kwargs, {}) + @mock.patch("ietf.sync.tasks.update_errata_from_rfceditor") + @mock.patch("ietf.sync.tasks.mark_rfcindex_as_dirty") + @mock.patch("ietf.sync.tasks.mark_errata_as_processed") + @mock.patch("ietf.sync.tasks.errata_are_dirty") + def test_update_errata_from_rfceditor_task( + self, + mock_errata_are_dirty, + mock_mark_errata_processed, + mock_mark_rfcindex_dirty, + mock_update, + ): + mock_errata_are_dirty.return_value = False + update_errata_from_rfceditor_task() + self.assertTrue(mock_errata_are_dirty.called) + self.assertFalse(mock_mark_errata_processed.called) + self.assertFalse(mock_mark_rfcindex_dirty.called) + self.assertFalse(mock_update.called) + + mock_errata_are_dirty.reset_mock() + mock_errata_are_dirty.return_value = True + update_errata_from_rfceditor_task() + self.assertTrue(mock_errata_are_dirty.called) + self.assertTrue(mock_mark_errata_processed.called) + self.assertTrue(mock_mark_rfcindex_dirty.called) + self.assertTrue(mock_update.called) From 3a4e248570e7373e0de8c2281af2df6d485919e1 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 15:24:12 -0300 Subject: [PATCH 09/12] refactor: slight reorg in errata.py --- ietf/sync/errata.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 81a31e7484..8a4c13c3a9 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -122,6 +122,11 @@ def update_errata_tags(errata_data): ) +def update_errata_from_rfceditor(): + errata_data = get_errata_data() + update_errata_tags(errata_data) + + ## DirtyBits management for the errata tags ERRATA_SLUG = DirtyBits.Slugs.ERRATA @@ -175,8 +180,3 @@ def errata_are_dirty(): dirty_work.processed_time is None or dirty_work.dirty_time >= dirty_work.processed_time ) - - -def update_errata_from_rfceditor(): - errata_data = get_errata_data() - update_errata_tags(errata_data) From a568046bbed4656005a64ce97e8f1e58b46ede9b Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 19:43:01 -0300 Subject: [PATCH 10/12] test: test errata module --- ietf/sync/tests.py | 193 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 192 insertions(+), 1 deletion(-) diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index 6cbd91d59f..eb70bcf9e9 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -12,6 +12,8 @@ from dataclasses import dataclass from django.conf import settings +from django.core.files.base import ContentFile +from django.core.files.storage import storages from django.urls import reverse as urlreverse from django.utils import timezone from django.test.utils import override_settings @@ -24,7 +26,7 @@ RfcFactory, DocumentAuthorFactory, DocEventFactory, - BcpFactory, + BcpFactory, WgRfcFactory, ) from ietf.doc.models import Document, DocEvent, DeletedEvent, DocTagName, RelatedDocument, State, StateDocEvent from ietf.doc.utils import add_state_change_event @@ -32,8 +34,12 @@ from ietf.person.factories import PersonFactory from ietf.person.models import Person from ietf.sync import iana, rfceditor, tasks +from ietf.sync.errata import update_errata_from_rfceditor, get_errata_last_updated, \ + get_errata_data, errata_map_from_json, update_errata_dirty_time, \ + mark_errata_as_processed, update_errata_tags from ietf.sync.tasks import update_errata_from_rfceditor_task from ietf.utils.mail import outbox, empty_outbox +from ietf.utils.models import DirtyBits from ietf.utils.test_utils import login_testing_unauthorized from ietf.utils.test_utils import TestCase from ietf.utils.timezone import date_today, RPC_TZINFO @@ -882,6 +888,191 @@ def test_rfceditor_undo(self): self.assertTrue(StateDocEvent.objects.filter(desc="First", doc=draft)) +class ErrataTests(TestCase): + @override_settings(ERRATA_JSON_BLOB_NAME="myblob.json") + def test_get_errata_last_update(self): + red_bucket = storages["red_bucket"] # InMemoryStorage in test + red_bucket.save("myblob.json", ContentFile("file")) + self.assertEqual( + get_errata_last_updated(), red_bucket.get_modified_time("myblob.json") + ) + + @override_settings(ERRATA_JSON_BLOB_NAME="myblob.json") + def test_get_errata_data(self): + red_bucket = storages["red_bucket"] # InMemoryStorage in test + red_bucket.save("myblob.json", ContentFile('[{"value": 3}]')) + self.assertEqual( + get_errata_data(), + [{"value": 3}], + ) + + def test_errata_map_from_json(self): + input_data = [ + { + "doc-id": "not-an-rfc", + "errata_status_code": "Verified", + }, + { + "doc-id": "rfc01234", + "errata_status_code": "Reported", + }, + { + "doc-id": "RFC1001", + "errata_status_code": "Verified" + }, + { + "doc-id": "RfC1234", + "errata_status_code": "Verified", + }, + ] + expected_output = {1001: [input_data[2]], 1234: [input_data[1], input_data[3]]} + self.assertDictEqual(errata_map_from_json(input_data), expected_output) + + @mock.patch("ietf.sync.errata.update_errata_tags") + @mock.patch("ietf.sync.errata.get_errata_data") + def test_update_errata_from_rfceditor(self, mock_get_data, mock_update): + fake_data = object() + mock_get_data.return_value = fake_data + update_errata_from_rfceditor() + self.assertTrue(mock_get_data.called) + self.assertTrue(mock_update.called) + self.assertEqual(mock_update.call_args, mock.call(fake_data)) + + def test_update_errata_tags(self): + tag_has_errata = DocTagName.objects.get(slug="errata") + tag_has_verified_errata = DocTagName.objects.get(slug="verified-errata") + + rfcs = WgRfcFactory.create_batch(10) + rfcs[0].tags.set([tag_has_errata]) + rfcs[1].tags.set([tag_has_errata, tag_has_verified_errata]) + rfcs[2].tags.set([tag_has_errata]) + rfcs[3].tags.set([tag_has_errata, tag_has_verified_errata]) + rfcs[4].tags.set([tag_has_errata]) + rfcs[5].tags.set([tag_has_errata, tag_has_verified_errata]) + + # Only contains the fields we care about, not the full JSON + errata_data = [ + # rfcs[0] had errata and should keep it + {"doc-id": rfcs[0].name, "errata_status_code": "Held for Document Update"}, + {"doc-id": rfcs[0].name, "errata_status_code": "Rejected"}, + # rfcs[1] had errata+verified-errata and should keep both + {"doc-id": rfcs[1].name, "errata_status_code": "Verified"}, + # rfcs[2] had errata and should gain verified-errata + {"doc-id": rfcs[2].name, "errata_status_code": "Verified"}, + # rfcs[3] had errata+verified errata and should lose both + {"doc-id": rfcs[3].name, "errata_status_code": "Rejected"}, + # rfcs[4] had errata and should gain verified-errata + {"doc-id": rfcs[4].name, "errata_status_code": "Verified"}, + {"doc-id": rfcs[4].name, "errata_status_code": "Reported"}, + # rfcs[5] had errata+verified-errata and should lose verified-errata + {"doc-id": rfcs[5].name, "errata_status_code": "Reported"}, + # rfcs[6] had none and should gain errata + {"doc-id": rfcs[6].name, "errata_status_code": "Reported"}, + # rfcs[7] had none and should gain errata+verified-errata + {"doc-id": rfcs[7].name, "errata_status_code": "Verified"}, + # rfcs[8] had none and it should stay that way + {"doc-id": rfcs[8].name, "errata_status_code": "Rejected"}, + # rfcs[9] had none and it should stay that way (no entry at all) + ] + update_errata_tags(errata_data) + + self.assertCountEqual(rfcs[0].tags.all(), [tag_has_errata]) + self.assertIsNone(rfcs[0].docevent_set.first()) # no change + + self.assertCountEqual( + rfcs[1].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertIsNone(rfcs[1].docevent_set.first()) # no change + + self.assertCountEqual( + rfcs[2].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertEqual(rfcs[2].docevent_set.count(), 1) + self.assertIn(": added verified-errata tag", rfcs[2].docevent_set.first().desc) + + self.assertCountEqual(rfcs[3].tags.all(), []) + self.assertEqual(rfcs[3].docevent_set.count(), 1) + self.assertIn( + ": removed errata tag, removed verified-errata tag (all errata rejected)", + rfcs[3].docevent_set.first().desc, + ) + + self.assertCountEqual( + rfcs[4].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertEqual(rfcs[4].docevent_set.count(), 1) + self.assertIn(": added verified-errata tag", rfcs[4].docevent_set.first().desc) + + self.assertCountEqual(rfcs[5].tags.all(), [tag_has_errata]) + self.assertEqual(rfcs[5].docevent_set.count(), 1) + self.assertIn( + ": removed verified-errata tag", rfcs[5].docevent_set.first().desc + ) + + self.assertCountEqual(rfcs[6].tags.all(), [tag_has_errata]) + self.assertEqual(rfcs[6].docevent_set.count(), 1) + self.assertIn(": added errata tag", rfcs[6].docevent_set.first().desc) + + self.assertCountEqual( + rfcs[7].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertEqual(rfcs[7].docevent_set.count(), 1) + self.assertIn( + ": added errata tag, added verified-errata tag", + rfcs[7].docevent_set.first().desc, + ) + + self.assertCountEqual(rfcs[8].tags.all(), []) + self.assertIsNone(rfcs[8].docevent_set.first()) # no change + + self.assertCountEqual(rfcs[9].tags.all(), []) + self.assertIsNone(rfcs[9].docevent_set.first()) # no change + + @override_settings(ERRATA_JSON_BLOB_NAME="myblob.json") + @mock.patch("ietf.sync.errata.get_errata_last_updated") + def test_update_errata_dirty_time(self, mock_last_updated): + ERRATA_SLUG = DirtyBits.Slugs.ERRATA + + # No time available + mock_last_updated.side_effect = FileNotFoundError + self.assertIsNone(DirtyBits.objects.filter(slug=ERRATA_SLUG).first()) + self.assertIsNone(update_errata_dirty_time()) # no blob yet + self.assertIsNone(DirtyBits.objects.filter(slug=ERRATA_SLUG).first()) + + # Now set a time + first_timestamp = timezone.now() - datetime.timedelta(hours=3) + mock_last_updated.return_value = first_timestamp + mock_last_updated.side_effect = None + result = update_errata_dirty_time() + self.assertTrue(isinstance(result, DirtyBits)) + result.refresh_from_db() + self.assertEqual(result.slug, ERRATA_SLUG) + self.assertEqual(result.processed_time, None) + self.assertEqual(result.dirty_time, first_timestamp) + + # Update the time + second_timestamp = timezone.now() + mock_last_updated.return_value = second_timestamp + second_result = update_errata_dirty_time() + self.assertEqual(result.pk, second_result.pk) # should be the same record + result.refresh_from_db() + self.assertEqual(result.slug, ERRATA_SLUG) + self.assertEqual(result.processed_time, None) + self.assertEqual(result.dirty_time, second_timestamp) + + def test_mark_errata_as_processed(self): + ERRATA_SLUG = DirtyBits.Slugs.ERRATA + first_timestamp = timezone.now() + mark_errata_as_processed(first_timestamp) # no DirtyBits is not an error + self.assertIsNone(DirtyBits.objects.filter(slug=ERRATA_SLUG).first()) + dbits = DirtyBits.objects.create(slug=ERRATA_SLUG, dirty_time=first_timestamp) + second_timestamp = timezone.now() + mark_errata_as_processed(second_timestamp) + dbits.refresh_from_db() + self.assertEqual(dbits.dirty_time, first_timestamp) + self.assertEqual(dbits.processed_time, second_timestamp) + + class TaskTests(TestCase): @override_settings( RFC_EDITOR_INDEX_URL="https://rfc-editor.example.com/index/", From 96e227b89b349e67c65e6f7149577974ed3021dd Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 19:43:17 -0300 Subject: [PATCH 11/12] fix: guard map lookup --- ietf/sync/errata.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 8a4c13c3a9..9f71cf111b 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -109,8 +109,9 @@ def update_errata_tags(errata_data): rfc.tags.remove(tag) change_descs.append(f"removed {tag.slug} tag") summary = "Update from RFC Editor: " + ", ".join(change_descs) - if all( - er["errata_status_code"] == "Rejected" for er in errata_map[rfc.rfc_number] + if rfc.rfc_number in errata_map and all( + er["errata_status_code"] == "Rejected" + for er in errata_map[rfc.rfc_number] ): summary += " (all errata rejected)" DocEvent.objects.create( From a34917937cafd0637a22e60e06e1dbcad72dcc53 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 20:04:06 -0300 Subject: [PATCH 12/12] fix: lint / typing --- ietf/sync/errata.py | 9 +++++---- ietf/sync/tests.py | 22 ++++++++++++++++++---- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 9f71cf111b..113d987291 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -2,7 +2,7 @@ import datetime import json from collections import defaultdict -from typing import DefaultDict, Literal +from typing import DefaultDict from django.conf import settings from django.core.files.storage import storages @@ -18,6 +18,7 @@ DEFAULT_ERRATA_JSON_BLOB_NAME = "other/errata.json" +type ErrataJsonEntry = dict[str, str] def get_errata_last_updated() -> datetime.datetime: """Get timestamp of the last errata.json update @@ -30,7 +31,7 @@ def get_errata_last_updated() -> datetime.datetime: ) -def get_errata_data(): +def get_errata_data() -> list[ErrataJsonEntry]: red_bucket = storages["red_bucket"] with red_bucket.open( getattr(settings, "ERRATA_JSON_BLOB_NAME", DEFAULT_ERRATA_JSON_BLOB_NAME), "r" @@ -39,7 +40,7 @@ def get_errata_data(): return errata_data -def errata_map_from_json(errata_data): +def errata_map_from_json(errata_data: list[ErrataJsonEntry]): """Create a dict mapping RFC number to a list of applicable errata records""" errata = defaultdict(list) for item in errata_data: @@ -50,7 +51,7 @@ def errata_map_from_json(errata_data): return dict(errata) -def update_errata_tags(errata_data): +def update_errata_tags(errata_data: list[ErrataJsonEntry]): tag_has_errata = DocTagName.objects.get(slug="errata") tag_has_verified_errata = DocTagName.objects.get(slug="verified-errata") system = Person.objects.get(name="(System)") diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index eb70bcf9e9..e83b6a5e0a 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -28,15 +28,29 @@ DocEventFactory, BcpFactory, WgRfcFactory, ) -from ietf.doc.models import Document, DocEvent, DeletedEvent, DocTagName, RelatedDocument, State, StateDocEvent +from ietf.doc.models import ( + Document, + DocEvent, + DeletedEvent, + DocTagName, + RelatedDocument, + State, + StateDocEvent, +) from ietf.doc.utils import add_state_change_event from ietf.group.factories import GroupFactory from ietf.person.factories import PersonFactory from ietf.person.models import Person from ietf.sync import iana, rfceditor, tasks -from ietf.sync.errata import update_errata_from_rfceditor, get_errata_last_updated, \ - get_errata_data, errata_map_from_json, update_errata_dirty_time, \ - mark_errata_as_processed, update_errata_tags +from ietf.sync.errata import ( + update_errata_from_rfceditor, + get_errata_last_updated, + get_errata_data, + errata_map_from_json, + update_errata_dirty_time, + mark_errata_as_processed, + update_errata_tags, +) from ietf.sync.tasks import update_errata_from_rfceditor_task from ietf.utils.mail import outbox, empty_outbox from ietf.utils.models import DirtyBits