From 73132b34f6dc4088a5a27977f68d8a05cbc38e03 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 13 Apr 2026 14:38:42 -0300 Subject: [PATCH 01/27] fix: resolve procmaterials when uploaded (#10693) * fix: resolve procmaterials when uploaded * fix: avoid leaving stale ResolvedMaterial --- ietf/meeting/utils.py | 16 ++++++++++++++-- ietf/meeting/views_proceedings.py | 4 ++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index bdf3d3d3d3..10ae0d3667 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -1025,9 +1025,18 @@ def resolve_materials_for_one_meeting(meeting: Meeting): ) def resolve_uploaded_material(meeting: Meeting, doc: Document): - resolved = [] + resolved: list[ResolvedMaterial] = [] + remove = ResolvedMaterial.objects.none() blob = resolve_one_material(doc, rev=None, ext=None) - if blob is not None: + if blob is None: + # Versionless file does not exist. Remove the versionless ResolvedMaterial + # if it existed. This is to avoid leaving behind a stale link to a replaced + # version. This comes up e.g. if a ProceedingsMaterial is changed from having + # an uploaded file to being an external URL. + remove = ResolvedMaterial.objects.filter( + name=doc.name, meeting_number=meeting.number + ) + else: resolved.append( ResolvedMaterial( name=doc.name, @@ -1047,12 +1056,15 @@ def resolve_uploaded_material(meeting: Meeting, doc: Document): blob=blob.name, ) ) + # Create the new record(s) ResolvedMaterial.objects.bulk_create( resolved, update_conflicts=True, unique_fields=["name", "meeting_number"], update_fields=["bucket", "blob"], ) + # and remove one if necessary (will be a none() queryset if not) + remove.delete() def store_blob_for_one_material_file(doc: Document, rev: str, filepath: Path): diff --git a/ietf/meeting/views_proceedings.py b/ietf/meeting/views_proceedings.py index d1169bff2d..639efa1da4 100644 --- a/ietf/meeting/views_proceedings.py +++ b/ietf/meeting/views_proceedings.py @@ -14,7 +14,7 @@ from ietf.meeting.models import Meeting, MeetingHost from ietf.meeting.helpers import get_meeting from ietf.name.models import ProceedingsMaterialTypeName -from ietf.meeting.utils import handle_upload_file +from ietf.meeting.utils import handle_upload_file, resolve_uploaded_material from ietf.utils.text import xslugify class UploadProceedingsMaterialForm(FileUploadForm): @@ -150,7 +150,7 @@ def save_proceedings_material_doc(meeting, material_type, title, request, file=N if events: doc.save_with_history(events) - + resolve_uploaded_material(meeting, doc) return doc From 3e23f162c4929703a630141a343d820252b29a83 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Mon, 13 Apr 2026 13:26:38 -0500 Subject: [PATCH 02/27] chore: git ignore .claude (#10705) --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 84bc800e3b..ccc7a46b08 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .DS_store datatracker.sublime-project datatracker.sublime-workspace +/.claude /.coverage /.factoryboy_random_state /.mypy_cache From dee75e2e67bcb47b2dc1e556cc34ecd08dfa6806 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 13 Apr 2026 15:37:33 -0300 Subject: [PATCH 03/27] feat: unmask exceptions on blob retrieve error (#10704) These have not been encountered in at least three months, likely longer. Still remains to unmask exceptions when storing blobs. We are probably ready to do that, but log noise caused by expected failures makes it hard to be certain. Soon... --- ietf/doc/storage_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ietf/doc/storage_utils.py b/ietf/doc/storage_utils.py index 81588c83ec..ffdd4599be 100644 --- a/ietf/doc/storage_utils.py +++ b/ietf/doc/storage_utils.py @@ -178,8 +178,7 @@ def retrieve_bytes(kind: str, name: str) -> bytes: content = f.read() except Exception as err: log(f"Blobstore Error: Failed to read bytes from {kind}:{name}: {repr(err)}") - if settings.SERVER_MODE == "development": - raise + raise return content @@ -192,6 +191,5 @@ def retrieve_str(kind: str, name: str) -> str: content = content_bytes.decode("utf-8") except Exception as err: log(f"Blobstore Error: Failed to read string from {kind}:{name}: {repr(err)}") - if settings.SERVER_MODE == "development": - raise + raise return content From aa847b4b8ae292e60f9d8f96bc23904c156cda47 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Mon, 13 Apr 2026 14:15:06 -0500 Subject: [PATCH 04/27] fix: look in target if draft name not found in anchor (#10636) --- ietf/utils/xmldraft.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ietf/utils/xmldraft.py b/ietf/utils/xmldraft.py index f555a0a16a..325b8499a9 100644 --- a/ietf/utils/xmldraft.py +++ b/ietf/utils/xmldraft.py @@ -102,6 +102,17 @@ def _document_name(self, ref): number = int(maybe_number) return f"{label}{number}" + target = ref.get("target") + if isinstance(target, str): + target = target.lower() + if target.startswith("https://datatracker.ietf.org/doc/"): + # len("https://datatracker.ietf.org/doc/")==33 + m = re.match(r"^(draft-[a-z0-9-]*[a-z0-9])([/-]\d{2})?/?$",target[33:]) + if m: + name = m.group(1) + return name + + # if we couldn't find a match so far, try the seriesInfo series_query = " or ".join(f"@name='{x.upper()}'" for x in series) for info in ref.xpath( From 60ecfa2029f81b6a6b987770a1edd4840a6acf02 Mon Sep 17 00:00:00 2001 From: NGPixel Date: Mon, 13 Apr 2026 16:43:16 -0400 Subject: [PATCH 05/27] chore(k8s): remove interpod affinity --- k8s/auth.yaml | 10 ---------- k8s/beat.yaml | 10 ---------- k8s/celery.yaml | 10 ---------- k8s/memcached.yaml | 10 ---------- k8s/rabbitmq.yaml | 10 ---------- k8s/replicator.yaml | 10 ---------- 6 files changed, 60 deletions(-) diff --git a/k8s/auth.yaml b/k8s/auth.yaml index 392e306b54..2bdb064447 100644 --- a/k8s/auth.yaml +++ b/k8s/auth.yaml @@ -15,16 +15,6 @@ spec: labels: app: auth spec: - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - datatracker - topologyKey: "kubernetes.io/hostname" securityContext: runAsNonRoot: true containers: diff --git a/k8s/beat.yaml b/k8s/beat.yaml index cc98beecf6..9ab242681c 100644 --- a/k8s/beat.yaml +++ b/k8s/beat.yaml @@ -17,16 +17,6 @@ spec: labels: app: beat spec: - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - datatracker - topologyKey: "kubernetes.io/hostname" securityContext: runAsNonRoot: true containers: diff --git a/k8s/celery.yaml b/k8s/celery.yaml index a2799f2a6d..2f4c0fd439 100644 --- a/k8s/celery.yaml +++ b/k8s/celery.yaml @@ -17,16 +17,6 @@ spec: labels: app: celery spec: - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - datatracker - topologyKey: "kubernetes.io/hostname" securityContext: runAsNonRoot: true containers: diff --git a/k8s/memcached.yaml b/k8s/memcached.yaml index 8f73f3d0d5..5a4c9f0aed 100644 --- a/k8s/memcached.yaml +++ b/k8s/memcached.yaml @@ -13,16 +13,6 @@ spec: labels: app: memcached spec: - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - datatracker - topologyKey: "kubernetes.io/hostname" securityContext: runAsNonRoot: true containers: diff --git a/k8s/rabbitmq.yaml b/k8s/rabbitmq.yaml index 780a399239..346b54c93e 100644 --- a/k8s/rabbitmq.yaml +++ b/k8s/rabbitmq.yaml @@ -13,16 +13,6 @@ spec: labels: app: rabbitmq spec: - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - datatracker - topologyKey: "kubernetes.io/hostname" securityContext: runAsNonRoot: true containers: diff --git a/k8s/replicator.yaml b/k8s/replicator.yaml index 9c462bd96b..a28d9e8a16 100644 --- a/k8s/replicator.yaml +++ b/k8s/replicator.yaml @@ -17,16 +17,6 @@ spec: labels: app: replicator spec: - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - datatracker - topologyKey: "kubernetes.io/hostname" securityContext: runAsNonRoot: true containers: From 1689b88859e7c7fc49de4c96c9ac3eec7c239e50 Mon Sep 17 00:00:00 2001 From: Kesara Rathnayake Date: Tue, 14 Apr 2026 16:06:34 +1200 Subject: [PATCH 06/27] fix: Wire bcp, std, fyi index creation (#10706) * style: Ruff Ruff * fix: Wire bcp and std index creation tasks * fix: Use single task and add fyi index creation * fix: Log index creation errors and continue --- ietf/api/views_rpc.py | 21 ++++++----- ietf/sync/tasks.py | 82 +++++++++++++++++++++++++++++++------------ 2 files changed, 72 insertions(+), 31 deletions(-) diff --git a/ietf/api/views_rpc.py b/ietf/api/views_rpc.py index 1e96118e58..25982d76bf 100644 --- a/ietf/api/views_rpc.py +++ b/ietf/api/views_rpc.py @@ -32,7 +32,9 @@ EmailPersonSerializer, RfcWithAuthorsSerializer, DraftWithAuthorsSerializer, - NotificationAckSerializer, RfcPubSerializer, RfcFileSerializer, + NotificationAckSerializer, + RfcPubSerializer, + RfcFileSerializer, EditableRfcSerializer, ) from ietf.doc.models import Document, DocHistory, RfcAuthor, DocEvent @@ -344,9 +346,10 @@ def post(self, request): class RfcAuthorViewSet(viewsets.ReadOnlyModelViewSet): """ViewSet for RfcAuthor model - + Router needs to provide rfc_number as a kwarg """ + api_key_endpoint = "ietf.api.views_rpc" queryset = RfcAuthor.objects.all() @@ -407,7 +410,7 @@ class RfcPubFilesView(APIView): def _fs_destination(self, filename: str | Path) -> Path: """Destination for an uploaded RFC file in the filesystem - + Strips any path components in filename and returns an absolute Path. """ rfc_path = Path(settings.RFC_PATH) @@ -419,7 +422,7 @@ def _fs_destination(self, filename: str | Path) -> Path: def _blob_destination(self, filename: str | Path) -> str: """Destination name for an uploaded RFC file in the blob store - + Strips any path components in filename and returns an absolute Path. """ filename = Path(filename) # could potentially have directory components @@ -472,9 +475,7 @@ def post(self, request): code="files-exist", ) for possible_existing_blob in possible_rfc_blobs: - if exists_in_storage( - kind=blob_kind, name=possible_existing_blob - ): + if exists_in_storage(kind=blob_kind, name=possible_existing_blob): raise Conflict( "Blob(s) already exist for this RFC", code="blobs-exist", @@ -523,7 +524,9 @@ def post(self, request): # Trigger red precomputer needs_updating = [rfc.rfc_number] - for rel in rfc.relateddocument_set.filter(relationship_id__in=["obs","updates"]): + for rel in rfc.relateddocument_set.filter( + relationship_id__in=["obs", "updates"] + ): needs_updating.append(rel.target.rfc_number) trigger_red_precomputer_task.delay(rfc_number_list=sorted(needs_updating)) # Trigger search index update @@ -540,7 +543,7 @@ class RfcIndexView(APIView): @extend_schema( operation_id="refresh_rfc_index", summary="Refresh rfc-index files", - description="Requests creation of rfc-index.xml and rfc-index.txt files", + description="Requests creation of various index files.", responses={202: None}, request=None, ) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 4c84dc581e..c48368cccd 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -18,7 +18,13 @@ from ietf.sync import iana from ietf.sync import rfceditor from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue -from ietf.sync.rfcindex import create_rfc_txt_index, create_rfc_xml_index +from ietf.sync.rfcindex import ( + create_bcp_txt_index, + create_fyi_txt_index, + create_rfc_txt_index, + create_rfc_xml_index, + create_std_txt_index, +) from ietf.sync.utils import build_from_file_content, load_rfcs_into_blobdb, rsync_helper from ietf.utils import log from ietf.utils.timezone import date_today @@ -27,13 +33,13 @@ @shared_task def rfc_editor_index_update_task(full_index=False): """Update metadata from the RFC index - + Default is to examine only changes in the past 365 days. Call with full_index=True to update the full RFC index. - + According to comments on the original script, a year's worth took about 20s on production as of August 2022 - + The original rfc-editor-index-update script had a long-disabled provision for running the rebuild_reference_relations scripts after the update. That has not been brought over at all because it should be implemented as its own task if it is needed. @@ -51,7 +57,7 @@ def rfc_editor_index_update_task(full_index=False): timeout=30, # seconds ) except requests.Timeout as exc: - log.log(f'GET request timed out retrieving RFC editor index: {exc}') + log.log(f"GET request timed out retrieving RFC editor index: {exc}") return # failed rfc_index_xml = response.text index_data = rfceditor.parse_index(io.StringIO(rfc_index_xml)) @@ -61,9 +67,9 @@ def rfc_editor_index_update_task(full_index=False): timeout=30, # seconds ) except requests.Timeout as exc: - log.log(f'GET request timed out retrieving RFC editor errata: {exc}') + log.log(f"GET request timed out retrieving RFC editor errata: {exc}") return # failed - errata_data = response.json() + errata_data = response.json() if len(index_data) < rfceditor.MIN_INDEX_RESULTS: log.log("Not enough index entries, only %s" % len(index_data)) return # failed @@ -96,15 +102,15 @@ def rfc_editor_queue_updates_task(): drafts, warnings = parse_queue(io.StringIO(response.text)) for w in warnings: log.log(f"Warning: {w}") - + if len(drafts) < MIN_QUEUE_RESULTS: log.log("Not enough results, only %s" % len(drafts)) return # failed - + changed, warnings = update_drafts_from_queue(drafts) for w in warnings: log.log(f"Warning: {w}") - + for c in changed: log.log(f"Updated {c}") @@ -120,9 +126,11 @@ def iana_changes_update_task(): MAX_INTERVAL_ACCEPTED_BY_IANA = datetime.timedelta(hours=23) start = ( - timezone.now() - - datetime.timedelta(hours=23) - + datetime.timedelta(seconds=CLOCK_SKEW_COMPENSATION,) + timezone.now() + - datetime.timedelta(hours=23) + + datetime.timedelta( + seconds=CLOCK_SKEW_COMPENSATION, + ) ) end = start + datetime.timedelta(hours=23) @@ -133,7 +141,9 @@ def iana_changes_update_task(): # requests if necessary text = iana.fetch_changes_json( - settings.IANA_SYNC_CHANGES_URL, t, min(end, t + MAX_INTERVAL_ACCEPTED_BY_IANA) + settings.IANA_SYNC_CHANGES_URL, + t, + min(end, t + MAX_INTERVAL_ACCEPTED_BY_IANA), ) log.log(f"Retrieved the JSON: {text}") @@ -159,9 +169,9 @@ def iana_protocols_update_task(): # "this needs to be the date where this tool is first deployed" in the original # iana-protocols-updates script)" rfc_must_published_later_than = datetime.datetime( - 2012, - 11, - 26, + 2012, + 11, + 26, tzinfo=datetime.UTC, ) @@ -171,17 +181,17 @@ def iana_protocols_update_task(): timeout=30, ) except requests.Timeout as exc: - log.log(f'GET request timed out retrieving IANA protocols page: {exc}') + log.log(f"GET request timed out retrieving IANA protocols page: {exc}") return rfc_numbers = iana.parse_protocol_page(response.text) def batched(l, n): """Split list l up in batches of max size n. - + For Python 3.12 or later, replace this with itertools.batched() """ - return (l[i:i + n] for i in range(0, len(l), n)) + return (l[i : i + n] for i in range(0, len(l), n)) for batch in batched(rfc_numbers, 100): updated = iana.update_rfc_log_from_protocol_page( @@ -192,6 +202,7 @@ def batched(l, n): for d in updated: log.log("Added history entry for %s" % d.display_name()) + @shared_task def fix_subseries_docevents_task(): """Repairs DocEvents related to bugs around removing docs from subseries @@ -233,6 +244,7 @@ def fix_subseries_docevents_task(): time=obsoleting_time ) + @shared_task def rsync_rfcs_from_rfceditor_task(rfc_numbers: list[int]): log.log(f"Rsyncing rfcs from rfc-editor: {rfc_numbers}") @@ -277,6 +289,32 @@ def load_rfcs_into_blobdb_task(start: int, end: int): @shared_task def create_rfc_index_task(): - create_rfc_txt_index() - create_rfc_xml_index() + try: + create_rfc_txt_index() + except Exception as e: + log.log(f"Error: failure in creating rfc-index.txt. {e}") + pass + try: + create_rfc_xml_index() + except Exception as e: + log.log(f"Error: failure in creating rfc-index.xml. {e}") + pass + + try: + create_bcp_txt_index() + except Exception as e: + log.log(f"Error: failure in creating bcp-index.txt. {e}") + pass + + try: + create_std_txt_index() + except Exception as e: + log.log(f"Error: failure in creating std-index.txt. {e}") + pass + + try: + create_fyi_txt_index() + except Exception as e: + log.log(f"Error: failure in creating fyi-index.txt. {e}") + pass From c66655fb5745bfb8657edcae534f96056a7d4ff0 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 13:33:45 -0300 Subject: [PATCH 07/27] feat: debounce rfc index refresh (#10708) * feat: DirtyBits model + admin * style: ruff + modernize utils/admin.py * chore: remove unused code * feat: rfcindex DirtyBits helpers * feat: refresh RFC index only when dirty Renames the task to better match its purpose * test: update test * fix: typo * chore: add (empty) DirtyBits resource * fix: actually call mark_rfcindex_as_processed --- ietf/api/tests_views_rpc.py | 17 +++++-- ietf/api/views_rpc.py | 4 +- ietf/sync/rfcindex.py | 50 ++++++++++++++++++ ietf/sync/tasks.py | 68 ++++++++++++++----------- ietf/utils/admin.py | 65 +++++------------------ ietf/utils/migrations/0003_dirtybits.py | 37 ++++++++++++++ ietf/utils/models.py | 25 ++++++++- ietf/utils/resources.py | 12 +++-- 8 files changed, 183 insertions(+), 95 deletions(-) create mode 100644 ietf/utils/migrations/0003_dirtybits.py diff --git a/ietf/api/tests_views_rpc.py b/ietf/api/tests_views_rpc.py index 0db67e126f..180221cffc 100644 --- a/ietf/api/tests_views_rpc.py +++ b/ietf/api/tests_views_rpc.py @@ -1,4 +1,5 @@ # Copyright The IETF Trust 2025, All Rights Reserved +import datetime from io import StringIO from pathlib import Path from tempfile import TemporaryDirectory @@ -10,12 +11,15 @@ from django.test.utils import override_settings from django.urls import reverse as urlreverse import mock +from django.utils import timezone from ietf.blobdb.models import Blob from ietf.doc.factories import IndividualDraftFactory, RfcFactory, WgDraftFactory, WgRfcFactory from ietf.doc.models import RelatedDocument, Document from ietf.group.factories import RoleFactory, GroupFactory from ietf.person.factories import PersonFactory +from ietf.sync.rfcindex import rfcindex_is_dirty +from ietf.utils.models import DirtyBits from ietf.utils.test_utils import APITestCase, reload_db_objects @@ -408,8 +412,13 @@ def _valid_post_data(): ) @override_settings(APP_API_TOKENS={"ietf.api.views_rpc": ["valid-token"]}) - @mock.patch("ietf.api.views_rpc.create_rfc_index_task") - def test_refresh_rfc_index(self, mock_task): + def test_refresh_rfc_index(self): + DirtyBits.objects.create( + slug=DirtyBits.Slugs.RFCINDEX, + dirty_time=timezone.now() - datetime.timedelta(days=1), + processed_time=timezone.now() - datetime.timedelta(hours=12), + ) + self.assertFalse(rfcindex_is_dirty()) url = urlreverse("ietf.api.purple_api.refresh_rfc_index") response = self.client.get(url) self.assertEqual(response.status_code, 403) @@ -417,7 +426,7 @@ def test_refresh_rfc_index(self, mock_task): self.assertEqual(response.status_code, 403) response = self.client.get(url, headers={"X-Api-Key": "valid-token"}) self.assertEqual(response.status_code, 405) - self.assertFalse(mock_task.delay.called) + self.assertFalse(rfcindex_is_dirty()) response = self.client.post(url, headers={"X-Api-Key": "valid-token"}) self.assertEqual(response.status_code, 202) - self.assertTrue(mock_task.delay.called) + self.assertTrue(rfcindex_is_dirty()) diff --git a/ietf/api/views_rpc.py b/ietf/api/views_rpc.py index 25982d76bf..6bc45fe3da 100644 --- a/ietf/api/views_rpc.py +++ b/ietf/api/views_rpc.py @@ -47,7 +47,7 @@ update_rfc_searchindex_task, ) from ietf.person.models import Email, Person -from ietf.sync.tasks import create_rfc_index_task +from ietf.sync.rfcindex import mark_rfcindex_as_dirty class Conflict(APIException): @@ -548,5 +548,5 @@ class RfcIndexView(APIView): request=None, ) def post(self, request): - create_rfc_index_task.delay() + mark_rfcindex_as_dirty() return Response(status=202) diff --git a/ietf/sync/rfcindex.py b/ietf/sync/rfcindex.py index 0ea6fb939f..6864617874 100644 --- a/ietf/sync/rfcindex.py +++ b/ietf/sync/rfcindex.py @@ -1,4 +1,5 @@ # Copyright The IETF Trust 2026, All Rights Reserved +import datetime import json from collections import defaultdict from collections.abc import Container @@ -11,6 +12,7 @@ from django.conf import settings from django.core.files.base import ContentFile +from django.db.models import Q from lxml import etree from django.core.files.storage import storages @@ -22,6 +24,7 @@ from ietf.doc.models import Document from ietf.name.models import StdLevelName from ietf.utils.log import log +from ietf.utils.models import DirtyBits FORMATS_FOR_INDEX = ["txt", "html", "pdf", "xml", "ps"] SS_TXT_MARGIN = 3 @@ -739,3 +742,50 @@ def create_fyi_txt_index(): }, ) save_to_red_bucket("fyi-index.txt", index) + + +## DirtyBits management for the RFC index + +RFCINDEX_SLUG = DirtyBits.Slugs.RFCINDEX + + +def mark_rfcindex_as_dirty(): + _, created = DirtyBits.objects.update_or_create( + slug=RFCINDEX_SLUG, defaults={"dirty_time": timezone.now()} + ) + if created: + log(f"Created DirtyBits(slug='{RFCINDEX_SLUG}')") + + +def mark_rfcindex_as_processed(when: datetime.datetime): + n_updated = DirtyBits.objects.filter( + Q(processed_time__isnull=True) | Q(processed_time__lt=when), + slug=RFCINDEX_SLUG, + ).update(processed_time=when) + if n_updated > 0: + log(f"processed_time is now {when.isoformat()}") + else: + log("processed_time not updated, no matching record found") + + +def rfcindex_is_dirty(): + """Does the rfc index need to be updated?""" + dirty_work, created = DirtyBits.objects.get_or_create( + slug=RFCINDEX_SLUG, defaults={"dirty_time": timezone.now()} + ) + if created: + log(f"Created DirtyBits(slug='{RFCINDEX_SLUG}')") + display_processed_time = ( + dirty_work.processed_time.isoformat() + if dirty_work.processed_time is not None + else "never" + ) + log( + f"DirtyBits(slug='{RFCINDEX_SLUG}'): " + f"dirty_time={dirty_work.dirty_time.isoformat()} " + f"processed_time={display_processed_time}" + ) + return ( + dirty_work.processed_time is None + or dirty_work.dirty_time >= dirty_work.processed_time + ) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index c48368cccd..2805f431bf 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -24,6 +24,7 @@ create_rfc_txt_index, create_rfc_xml_index, create_std_txt_index, + rfcindex_is_dirty, mark_rfcindex_as_processed, ) from ietf.sync.utils import build_from_file_content, load_rfcs_into_blobdb, rsync_helper from ietf.utils import log @@ -288,33 +289,40 @@ def load_rfcs_into_blobdb_task(start: int, end: int): @shared_task -def create_rfc_index_task(): - try: - create_rfc_txt_index() - except Exception as e: - log.log(f"Error: failure in creating rfc-index.txt. {e}") - pass - - try: - create_rfc_xml_index() - except Exception as e: - log.log(f"Error: failure in creating rfc-index.xml. {e}") - pass - - try: - create_bcp_txt_index() - except Exception as e: - log.log(f"Error: failure in creating bcp-index.txt. {e}") - pass - - try: - create_std_txt_index() - except Exception as e: - log.log(f"Error: failure in creating std-index.txt. {e}") - pass - - try: - create_fyi_txt_index() - except Exception as e: - log.log(f"Error: failure in creating fyi-index.txt. {e}") - pass +def refresh_rfc_index_task(): + if rfcindex_is_dirty(): + # new_processed_time is the *start* of processing so that any changes after + # this point will trigger another refresh + new_processed_time = timezone.now() + + try: + create_rfc_txt_index() + except Exception as e: + log.log(f"Error: failure in creating rfc-index.txt. {e}") + pass + + try: + create_rfc_xml_index() + except Exception as e: + log.log(f"Error: failure in creating rfc-index.xml. {e}") + pass + + try: + create_bcp_txt_index() + except Exception as e: + log.log(f"Error: failure in creating bcp-index.txt. {e}") + pass + + try: + create_std_txt_index() + except Exception as e: + log.log(f"Error: failure in creating std-index.txt. {e}") + pass + + try: + create_fyi_txt_index() + except Exception as e: + log.log(f"Error: failure in creating fyi-index.txt. {e}") + pass + + mark_rfcindex_as_processed(new_processed_time) diff --git a/ietf/utils/admin.py b/ietf/utils/admin.py index e6324ad7cd..cb8841cdc6 100644 --- a/ietf/utils/admin.py +++ b/ietf/utils/admin.py @@ -1,71 +1,30 @@ -# Copyright The IETF Trust 2011-2020, All Rights Reserved -# -*- coding: utf-8 -*- +# Copyright The IETF Trust 2011-2026, All Rights Reserved from django.contrib import admin -from django.utils.encoding import force_str - -def name(obj): - if hasattr(obj, 'abbrev'): - return obj.abbrev() - elif hasattr(obj, 'name'): - if callable(obj.name): - name = obj.name() - else: - name = force_str(obj.name) - if name: - return name - return str(obj) - -def admin_link(field, label=None, ordering="", display=name, suffix=""): - if not label: - label = field.capitalize().replace("_", " ").strip() - if ordering == "": - ordering = field - def _link(self): - obj = self - for attr in field.split("__"): - obj = getattr(obj, attr) - if callable(obj): - obj = obj() - if hasattr(obj, "all"): - objects = obj.all() - elif callable(obj): - objects = obj() - if not hasattr(objects, "__iter__"): - objects = [ objects ] - elif hasattr(obj, "__iter__"): - objects = obj - else: - objects = [ obj ] - chunks = [] - for obj in objects: - app = obj._meta.app_label - model = obj.__class__.__name__.lower() - id = obj.pk - chunks += [ '%(display)s' % - {'app':app, "model": model, "id":id, "display": display(obj), "suffix":suffix, } ] - return ", ".join(chunks) - _link.allow_tags = True - _link.short_description = label - _link.admin_order_field = ordering - return _link +from .models import DumpInfo, DirtyBits class SaferStackedInline(admin.StackedInline): """StackedInline without delete by default""" + can_delete = False # no delete button show_change_link = True # show a link to the resource (where it can be deleted) class SaferTabularInline(admin.TabularInline): """TabularInline without delete by default""" + can_delete = False # no delete button show_change_link = True # show a link to the resource (where it can be deleted) -from .models import DumpInfo +@admin.register(DumpInfo) class DumpInfoAdmin(admin.ModelAdmin): - list_display = ['date', 'host', 'tz'] - list_filter = ['date'] -admin.site.register(DumpInfo, DumpInfoAdmin) + list_display = ["date", "host", "tz"] + list_filter = ["date"] + + +@admin.register(DirtyBits) +class DirtyBitsAdmin(admin.ModelAdmin): + list_display = ["slug", "dirty_time", "processed_time"] diff --git a/ietf/utils/migrations/0003_dirtybits.py b/ietf/utils/migrations/0003_dirtybits.py new file mode 100644 index 0000000000..11f6ed09f6 --- /dev/null +++ b/ietf/utils/migrations/0003_dirtybits.py @@ -0,0 +1,37 @@ +# Copyright The IETF Trust 2026, All Rights Reserved + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("utils", "0002_delete_versioninfo"), + ] + + operations = [ + migrations.CreateModel( + name="DirtyBits", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "slug", + models.CharField( + choices=[("rfcindex", "RFC Index")], max_length=40, unique=True + ), + ), + ("dirty_time", models.DateTimeField(blank=True, null=True)), + ("processed_time", models.DateTimeField(blank=True, null=True)), + ], + options={ + "verbose_name_plural": "dirty bits", + }, + ), + ] diff --git a/ietf/utils/models.py b/ietf/utils/models.py index 21af5766e9..13afbdfe20 100644 --- a/ietf/utils/models.py +++ b/ietf/utils/models.py @@ -1,14 +1,35 @@ -# Copyright The IETF Trust 2015-2020, All Rights Reserved +# Copyright The IETF Trust 2015-2026, All Rights Reserved import itertools from django.db import models + +class DirtyBits(models.Model): + """A weak semaphore mechanism for coordination with celery beat tasks + + Web workers will set the "dirty_time" value for a given dirtybit slug. + Celery workers will do work if "processed_time" < "dirty_time" and update + "processed_time". + """ + + class Slugs(models.TextChoices): + RFCINDEX = "rfcindex", "RFC Index" + + # next line can become `...choices=Slugs)` when we get to Django 5.x + slug = models.CharField(max_length=40, blank=False, choices=Slugs.choices, unique=True) + dirty_time = models.DateTimeField(null=True, blank=True) + processed_time = models.DateTimeField(null=True, blank=True) + + class Meta: + verbose_name_plural = "dirty bits" + + class DumpInfo(models.Model): date = models.DateTimeField() host = models.CharField(max_length=128) tz = models.CharField(max_length=32, default='UTC') - + class ForeignKey(models.ForeignKey): "A local ForeignKey proxy which provides the on_delete value required under Django 2.0." def __init__(self, to, on_delete=models.CASCADE, **kwargs): diff --git a/ietf/utils/resources.py b/ietf/utils/resources.py index 1252cfef14..63206eb33a 100644 --- a/ietf/utils/resources.py +++ b/ietf/utils/resources.py @@ -1,6 +1,4 @@ -# Copyright The IETF Trust 2014-2019, All Rights Reserved -# -*- coding: utf-8 -*- -# Autogenerated by the mkresources management command 2014-11-13 05:39 +# Copyright The IETF Trust 2014-2026, All Rights Reserved from ietf.api import ModelResource @@ -12,7 +10,7 @@ from django.contrib.contenttypes.models import ContentType from ietf import api -from ietf.utils.models import DumpInfo +from ietf.utils.models import DirtyBits, DumpInfo class UserResource(ModelResource): @@ -43,3 +41,9 @@ class Meta: "host": ALL, } api.utils.register(DumpInfoResource()) + + +class DirtyBitsResource(ModelResource): + class Meta: + queryset = DirtyBits.objects.none() +api.utils.register(DirtyBitsResource()) From 98d6bb51481fbba98aefafc4fa3c116e9fc9f57c Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 12:40:03 -0300 Subject: [PATCH 08/27] feat: utils to sync errata tags from errata.json --- ietf/sync/errata.py | 108 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 ietf/sync/errata.py diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py new file mode 100644 index 0000000000..ca826687ec --- /dev/null +++ b/ietf/sync/errata.py @@ -0,0 +1,108 @@ +# Copyright The IETF Trust 2026, All Rights Reserved +import datetime +import json +from collections import defaultdict +from typing import DefaultDict, Literal + +from django.core.files.storage import storages + +from ietf.doc.models import Document, DocEvent +from ietf.name.models import DocTagName +from ietf.person.models import Person + +ERRATA_BLOB_NAME = "other/errata.json" # name of errata.json in the red bucket + +def get_errata_last_updated() -> datetime.datetime: + """Get timestamp of the last errata.json update + + May raise FileNotFoundError or other storage/S3 exceptions. Be prepared. + """ + red_bucket = storages["red_bucket"] + return red_bucket.get_modified_time(ERRATA_BLOB_NAME) + + +def get_errata_data(): + red_bucket = storages["red_bucket"] + with red_bucket.open(ERRATA_BLOB_NAME, "r") as f: + errata_data = json.load(f) + return errata_data + + +def errata_map_from_json(errata_data): + """Create a dict mapping RFC number to a list of applicable errata records""" + errata = defaultdict(list) + for item in errata_data: + doc_id = item["doc-id"] + if doc_id.upper().startswith("RFC"): + rfc_number = int(doc_id[3:]) + errata[rfc_number].append(item) + return dict(errata) + + +def update_errata_tags(errata_data): + tag_has_errata = DocTagName.objects.get(slug="errata") + tag_has_verified_errata = DocTagName.objects.get(slug="verified-errata") + system = Person.objects.get(name="(System)") + + errata_map = errata_map_from_json(errata_data) + nums_with_errata = [ + num + for num, errata in errata_map.items() + if any(er["errata_status_code"] != "Rejected" for er in errata) + ] + nums_with_verified_errata = [ + num + for num, errata in errata_map.items() + if any(er["errata_status_code"] == "Verified" for er in errata) + ] + + rfcs_gaining_errata_tag = Document.objects.filter( + type_id="rfc", rfc_number__in=nums_with_errata + ).exclude(tags=tag_has_errata) + + rfcs_gaining_verified_errata_tag = Document.objects.filter( + type_id="rfc", rfc_number__in=nums_with_verified_errata + ).exclude(tags=tag_has_verified_errata) + + rfcs_losing_errata_tag = Document.objects.filter( + type_id="rfc", tags=tag_has_errata + ).exclude(rfc_number__in=nums_with_errata) + + rfcs_losing_verified_errata_tag = Document.objects.filter( + type_id="rfc", tags=tag_has_verified_errata + ).exclude(rfc_number__in=nums_with_verified_errata) + + # map rfc_number to add/remove lists + changes: DefaultDict[Document, dict[str, list[DocTagName]]] = defaultdict( + lambda: {"add": [], "remove": []} + ) + for rfc in rfcs_gaining_errata_tag: + changes[rfc]["add"].append(tag_has_errata) + for rfc in rfcs_gaining_verified_errata_tag: + changes[rfc]["add"].append(tag_has_verified_errata) + for rfc in rfcs_losing_errata_tag: + changes[rfc]["remove"].append(tag_has_errata) + for rfc in rfcs_losing_verified_errata_tag: + changes[rfc]["remove"].append(tag_has_verified_errata) + + for rfc, changeset in changes.items(): + change_descs = [] + for tag in changeset["add"]: + rfc.tags.add(tag) + change_descs.append(f"added {tag.slug} tag") + for tag in changeset["remove"]: + rfc.tags.remove(tag) + change_descs.append(f"removed {tag.slug} tag") + summary = "Update from RFC Editor: " + ", ".join(change_descs) + if all( + er["errata_status_code"] == "Rejected" + for er in errata_map[rfc.rfc_number] + ): + summary += " (all errata rejected)" + DocEvent.objects.create( + doc=rfc, + rev=rfc.rev, # expect no rev + by=system, + type="sync_from_rfc_editor", + desc=summary + ) From c01380714f0c7d2b0e44dc5a5c9a8f11d3c73cf6 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 13:56:13 -0300 Subject: [PATCH 09/27] feat: DirtyBits for errata processing --- ietf/sync/errata.py | 57 +++++++++++++++++++ .../migrations/0004_alter_dirtybits_slug.py | 21 +++++++ ietf/utils/models.py | 1 + 3 files changed, 79 insertions(+) create mode 100644 ietf/utils/migrations/0004_alter_dirtybits_slug.py diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index ca826687ec..1e8e122eae 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -5,10 +5,13 @@ from typing import DefaultDict, Literal from django.core.files.storage import storages +from django.db.models import Q from ietf.doc.models import Document, DocEvent from ietf.name.models import DocTagName from ietf.person.models import Person +from ietf.utils.log import log +from ietf.utils.models import DirtyBits ERRATA_BLOB_NAME = "other/errata.json" # name of errata.json in the red bucket @@ -106,3 +109,57 @@ def update_errata_tags(errata_data): type="sync_from_rfc_editor", desc=summary ) + + +## DirtyBits management for the errata tags + +ERRATA_SLUG = DirtyBits.Slugs.ERRATA + + +def update_errata_dirty_time() -> DirtyBits | None: + try: + last_update = get_errata_last_updated() + except Exception as err: + log(f"Error in get_errata_last_updated: {err}") + return None + else: + dirty_work, created = DirtyBits.objects.update_or_create( + slug=ERRATA_SLUG, defaults={"dirty_time": last_update} + ) + if created: + log(f"Created DirtyBits(slug='{ERRATA_SLUG}')") + return dirty_work + +def mark_errata_as_processed(when: datetime.datetime): + n_updated = DirtyBits.objects.filter( + Q(processed_time__isnull=True) | Q(processed_time__lt=when), + slug=ERRATA_SLUG, + ).update(processed_time=when) + if n_updated > 0: + log(f"processed_time is now {when.isoformat()}") + else: + log("processed_time not updated, no matching record found") + + +def errata_are_dirty(): + """Does the rfc index need to be updated?""" + dirty_work = update_errata_dirty_time() # creates DirtyBits if needed + if dirty_work is None: + # A None indicates we could not check the timestamp of errata.json. In that + # case, we are not likely to be able to read the blob either, so don't try + # to process it. An error was already logged. + return False + display_processed_time = ( + dirty_work.processed_time.isoformat() + if dirty_work.processed_time is not None + else "never" + ) + log( + f"DirtyBits(slug='{ERRATA_SLUG}'): " + f"dirty_time={dirty_work.dirty_time.isoformat()} " + f"processed_time={display_processed_time}" + ) + return ( + dirty_work.processed_time is None + or dirty_work.dirty_time >= dirty_work.processed_time + ) diff --git a/ietf/utils/migrations/0004_alter_dirtybits_slug.py b/ietf/utils/migrations/0004_alter_dirtybits_slug.py new file mode 100644 index 0000000000..e17ea6cadd --- /dev/null +++ b/ietf/utils/migrations/0004_alter_dirtybits_slug.py @@ -0,0 +1,21 @@ +# Copyright The IETF Trust 2026, All Rights Reserved + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("utils", "0003_dirtybits"), + ] + + operations = [ + migrations.AlterField( + model_name="dirtybits", + name="slug", + field=models.CharField( + choices=[("rfcindex", "RFC Index"), ("errata", "Errata Tags")], + max_length=40, + unique=True, + ), + ), + ] diff --git a/ietf/utils/models.py b/ietf/utils/models.py index 13afbdfe20..64f7f253f2 100644 --- a/ietf/utils/models.py +++ b/ietf/utils/models.py @@ -15,6 +15,7 @@ class DirtyBits(models.Model): class Slugs(models.TextChoices): RFCINDEX = "rfcindex", "RFC Index" + ERRATA = "errata", "Errata Tags" # next line can become `...choices=Slugs)` when we get to Django 5.x slug = models.CharField(max_length=40, blank=False, choices=Slugs.choices, unique=True) From 070c0595c58788b159c113004d749da962cf8449 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 14:03:21 -0300 Subject: [PATCH 10/27] feat: errata update task --- ietf/sync/tasks.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 2805f431bf..357075620f 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -17,6 +17,8 @@ from ietf.doc.tasks import rebuild_reference_relations_task from ietf.sync import iana from ietf.sync import rfceditor +from ietf.sync.errata import errata_are_dirty, mark_errata_as_processed, get_errata_data, \ + update_errata_tags from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue from ietf.sync.rfcindex import ( create_bcp_txt_index, @@ -288,6 +290,17 @@ def load_rfcs_into_blobdb_task(start: int, end: int): load_rfcs_into_blobdb(list(range(start, end + 1))) +@shared_task +def update_errata_from_rfceditor_task(): + if errata_are_dirty(): + # new_processed_time is the *start* of processing so that any changes after + # this point will trigger another refresh + new_processed_time = timezone.now() + errata_data = get_errata_data() + update_errata_tags(errata_data) + mark_errata_as_processed(new_processed_time) + + @shared_task def refresh_rfc_index_task(): if rfcindex_is_dirty(): From 9f1e5ceb1d90439653920c7080e6e1fb924e762f Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 14:20:10 -0300 Subject: [PATCH 11/27] feat: rebuild rfc indexes after errata update --- ietf/sync/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 357075620f..ac0fe173ca 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -26,7 +26,7 @@ create_rfc_txt_index, create_rfc_xml_index, create_std_txt_index, - rfcindex_is_dirty, mark_rfcindex_as_processed, + rfcindex_is_dirty, mark_rfcindex_as_processed, mark_rfcindex_as_dirty, ) from ietf.sync.utils import build_from_file_content, load_rfcs_into_blobdb, rsync_helper from ietf.utils import log @@ -299,6 +299,7 @@ def update_errata_from_rfceditor_task(): errata_data = get_errata_data() update_errata_tags(errata_data) mark_errata_as_processed(new_processed_time) + mark_rfcindex_as_dirty() # ensure any changes are reflected in the indexes @shared_task From 4dd9f2a82569b85b6d2e74e418a43b7382edb8af Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 14:32:32 -0300 Subject: [PATCH 12/27] chore: setting for ERRATA_JSON_BLOB_NAME --- ietf/sync/errata.py | 27 +++++++++++++++++---------- k8s/settings_local.py | 5 +++++ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 1e8e122eae..0d154bf86e 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -4,6 +4,7 @@ from collections import defaultdict from typing import DefaultDict, Literal +from django.conf import settings from django.core.files.storage import storages from django.db.models import Q @@ -13,20 +14,26 @@ from ietf.utils.log import log from ietf.utils.models import DirtyBits -ERRATA_BLOB_NAME = "other/errata.json" # name of errata.json in the red bucket + +DEFAULT_ERRATA_JSON_BLOB_NAME = "other/errata.json" + def get_errata_last_updated() -> datetime.datetime: """Get timestamp of the last errata.json update - + May raise FileNotFoundError or other storage/S3 exceptions. Be prepared. """ red_bucket = storages["red_bucket"] - return red_bucket.get_modified_time(ERRATA_BLOB_NAME) + return red_bucket.get_modified_time( + getattr(settings, "ERRATA_JSON_BLOB_NAME", DEFAULT_ERRATA_JSON_BLOB_NAME) + ) def get_errata_data(): red_bucket = storages["red_bucket"] - with red_bucket.open(ERRATA_BLOB_NAME, "r") as f: + with red_bucket.open( + getattr(settings, "ERRATA_JSON_BLOB_NAME", DEFAULT_ERRATA_JSON_BLOB_NAME), "r" + ) as f: errata_data = json.load(f) return errata_data @@ -78,10 +85,10 @@ def update_errata_tags(errata_data): # map rfc_number to add/remove lists changes: DefaultDict[Document, dict[str, list[DocTagName]]] = defaultdict( lambda: {"add": [], "remove": []} - ) + ) for rfc in rfcs_gaining_errata_tag: changes[rfc]["add"].append(tag_has_errata) - for rfc in rfcs_gaining_verified_errata_tag: + for rfc in rfcs_gaining_verified_errata_tag: changes[rfc]["add"].append(tag_has_verified_errata) for rfc in rfcs_losing_errata_tag: changes[rfc]["remove"].append(tag_has_errata) @@ -98,8 +105,7 @@ def update_errata_tags(errata_data): change_descs.append(f"removed {tag.slug} tag") summary = "Update from RFC Editor: " + ", ".join(change_descs) if all( - er["errata_status_code"] == "Rejected" - for er in errata_map[rfc.rfc_number] + er["errata_status_code"] == "Rejected" for er in errata_map[rfc.rfc_number] ): summary += " (all errata rejected)" DocEvent.objects.create( @@ -107,7 +113,7 @@ def update_errata_tags(errata_data): rev=rfc.rev, # expect no rev by=system, type="sync_from_rfc_editor", - desc=summary + desc=summary, ) @@ -130,6 +136,7 @@ def update_errata_dirty_time() -> DirtyBits | None: log(f"Created DirtyBits(slug='{ERRATA_SLUG}')") return dirty_work + def mark_errata_as_processed(when: datetime.datetime): n_updated = DirtyBits.objects.filter( Q(processed_time__isnull=True) | Q(processed_time__lt=when), @@ -148,7 +155,7 @@ def errata_are_dirty(): # A None indicates we could not check the timestamp of errata.json. In that # case, we are not likely to be able to read the blob either, so don't try # to process it. An error was already logged. - return False + return False display_processed_time = ( dirty_work.processed_time.isoformat() if dirty_work.processed_time is not None diff --git a/k8s/settings_local.py b/k8s/settings_local.py index 3a7e524f5a..251f11234f 100644 --- a/k8s/settings_local.py +++ b/k8s/settings_local.py @@ -511,3 +511,8 @@ def _multiline_to_list(s): "DATATRACKER_ERRATA_METADATA_NOTIFICATION_URL must be set if " "DATATRACKER_ERRATA_METADATA_NOTIFICATION_API_KEY is provided" ) + +# name (with path) of errata.json in the red bucket +ERRATA_JSON_BLOB_NAME = os.environ.get( + "DATATRACKER_ERRATA_JSON_BLOB_NAME", "other/errata.json" +) From 1b2aced58e6fa43ff5c9579964ac4684fb87824f Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 15:00:35 -0300 Subject: [PATCH 13/27] fix: transaction --- ietf/sync/errata.py | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 0d154bf86e..51531a6190 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -6,6 +6,7 @@ from django.conf import settings from django.core.files.storage import storages +from django.db import transaction from django.db.models import Q from ietf.doc.models import Document, DocEvent @@ -96,25 +97,29 @@ def update_errata_tags(errata_data): changes[rfc]["remove"].append(tag_has_verified_errata) for rfc, changeset in changes.items(): - change_descs = [] - for tag in changeset["add"]: - rfc.tags.add(tag) - change_descs.append(f"added {tag.slug} tag") - for tag in changeset["remove"]: - rfc.tags.remove(tag) - change_descs.append(f"removed {tag.slug} tag") - summary = "Update from RFC Editor: " + ", ".join(change_descs) - if all( - er["errata_status_code"] == "Rejected" for er in errata_map[rfc.rfc_number] - ): - summary += " (all errata rejected)" - DocEvent.objects.create( - doc=rfc, - rev=rfc.rev, # expect no rev - by=system, - type="sync_from_rfc_editor", - desc=summary, - ) + # Update in a transaction per RFC to keep tags and DocEvents consistent. + # With this in place, an interrupted task will be cleanly completed on the + # next run. + with transaction.atomic(): + change_descs = [] + for tag in changeset["add"]: + rfc.tags.add(tag) + change_descs.append(f"added {tag.slug} tag") + for tag in changeset["remove"]: + rfc.tags.remove(tag) + change_descs.append(f"removed {tag.slug} tag") + summary = "Update from RFC Editor: " + ", ".join(change_descs) + if all( + er["errata_status_code"] == "Rejected" for er in errata_map[rfc.rfc_number] + ): + summary += " (all errata rejected)" + DocEvent.objects.create( + doc=rfc, + rev=rfc.rev, # expect no rev + by=system, + type="sync_from_rfc_editor", + desc=summary, + ) ## DirtyBits management for the errata tags From 5bdbdf1ea961542f9aa36e9286222cea37598c41 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 15:14:13 -0300 Subject: [PATCH 14/27] refactor: extract method from task --- ietf/sync/errata.py | 5 +++++ ietf/sync/tasks.py | 10 ++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 51531a6190..81a31e7484 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -175,3 +175,8 @@ def errata_are_dirty(): dirty_work.processed_time is None or dirty_work.dirty_time >= dirty_work.processed_time ) + + +def update_errata_from_rfceditor(): + errata_data = get_errata_data() + update_errata_tags(errata_data) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index ac0fe173ca..34b2efeb5c 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -17,8 +17,11 @@ from ietf.doc.tasks import rebuild_reference_relations_task from ietf.sync import iana from ietf.sync import rfceditor -from ietf.sync.errata import errata_are_dirty, mark_errata_as_processed, get_errata_data, \ - update_errata_tags +from ietf.sync.errata import ( + errata_are_dirty, + mark_errata_as_processed, + update_errata_from_rfceditor, +) from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue from ietf.sync.rfcindex import ( create_bcp_txt_index, @@ -296,8 +299,7 @@ def update_errata_from_rfceditor_task(): # new_processed_time is the *start* of processing so that any changes after # this point will trigger another refresh new_processed_time = timezone.now() - errata_data = get_errata_data() - update_errata_tags(errata_data) + update_errata_from_rfceditor() mark_errata_as_processed(new_processed_time) mark_rfcindex_as_dirty() # ensure any changes are reflected in the indexes From 2c385a0f95a991ff547152dba2f700e7a7c8e3ec Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 15:15:32 -0300 Subject: [PATCH 15/27] test: test_update_errata_from_rfceditor_task --- ietf/sync/tests.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index 21d6cb5cd5..6cbd91d59f 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -1,5 +1,4 @@ -# Copyright The IETF Trust 2012-2020, All Rights Reserved -# -*- coding: utf-8 -*- +# Copyright The IETF Trust 2012-2026, All Rights Reserved import os @@ -33,6 +32,7 @@ from ietf.person.factories import PersonFactory from ietf.person.models import Person from ietf.sync import iana, rfceditor, tasks +from ietf.sync.tasks import update_errata_from_rfceditor_task from ietf.utils.mail import outbox, empty_outbox from ietf.utils.test_utils import login_testing_unauthorized from ietf.utils.test_utils import TestCase @@ -1215,3 +1215,28 @@ def test_load_rfcs_into_blobdb_task( self.assertEqual(mock_kwargs, {}) + @mock.patch("ietf.sync.tasks.update_errata_from_rfceditor") + @mock.patch("ietf.sync.tasks.mark_rfcindex_as_dirty") + @mock.patch("ietf.sync.tasks.mark_errata_as_processed") + @mock.patch("ietf.sync.tasks.errata_are_dirty") + def test_update_errata_from_rfceditor_task( + self, + mock_errata_are_dirty, + mock_mark_errata_processed, + mock_mark_rfcindex_dirty, + mock_update, + ): + mock_errata_are_dirty.return_value = False + update_errata_from_rfceditor_task() + self.assertTrue(mock_errata_are_dirty.called) + self.assertFalse(mock_mark_errata_processed.called) + self.assertFalse(mock_mark_rfcindex_dirty.called) + self.assertFalse(mock_update.called) + + mock_errata_are_dirty.reset_mock() + mock_errata_are_dirty.return_value = True + update_errata_from_rfceditor_task() + self.assertTrue(mock_errata_are_dirty.called) + self.assertTrue(mock_mark_errata_processed.called) + self.assertTrue(mock_mark_rfcindex_dirty.called) + self.assertTrue(mock_update.called) From 3a4e248570e7373e0de8c2281af2df6d485919e1 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 15:24:12 -0300 Subject: [PATCH 16/27] refactor: slight reorg in errata.py --- ietf/sync/errata.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 81a31e7484..8a4c13c3a9 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -122,6 +122,11 @@ def update_errata_tags(errata_data): ) +def update_errata_from_rfceditor(): + errata_data = get_errata_data() + update_errata_tags(errata_data) + + ## DirtyBits management for the errata tags ERRATA_SLUG = DirtyBits.Slugs.ERRATA @@ -175,8 +180,3 @@ def errata_are_dirty(): dirty_work.processed_time is None or dirty_work.dirty_time >= dirty_work.processed_time ) - - -def update_errata_from_rfceditor(): - errata_data = get_errata_data() - update_errata_tags(errata_data) From a568046bbed4656005a64ce97e8f1e58b46ede9b Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 19:43:01 -0300 Subject: [PATCH 17/27] test: test errata module --- ietf/sync/tests.py | 193 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 192 insertions(+), 1 deletion(-) diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index 6cbd91d59f..eb70bcf9e9 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -12,6 +12,8 @@ from dataclasses import dataclass from django.conf import settings +from django.core.files.base import ContentFile +from django.core.files.storage import storages from django.urls import reverse as urlreverse from django.utils import timezone from django.test.utils import override_settings @@ -24,7 +26,7 @@ RfcFactory, DocumentAuthorFactory, DocEventFactory, - BcpFactory, + BcpFactory, WgRfcFactory, ) from ietf.doc.models import Document, DocEvent, DeletedEvent, DocTagName, RelatedDocument, State, StateDocEvent from ietf.doc.utils import add_state_change_event @@ -32,8 +34,12 @@ from ietf.person.factories import PersonFactory from ietf.person.models import Person from ietf.sync import iana, rfceditor, tasks +from ietf.sync.errata import update_errata_from_rfceditor, get_errata_last_updated, \ + get_errata_data, errata_map_from_json, update_errata_dirty_time, \ + mark_errata_as_processed, update_errata_tags from ietf.sync.tasks import update_errata_from_rfceditor_task from ietf.utils.mail import outbox, empty_outbox +from ietf.utils.models import DirtyBits from ietf.utils.test_utils import login_testing_unauthorized from ietf.utils.test_utils import TestCase from ietf.utils.timezone import date_today, RPC_TZINFO @@ -882,6 +888,191 @@ def test_rfceditor_undo(self): self.assertTrue(StateDocEvent.objects.filter(desc="First", doc=draft)) +class ErrataTests(TestCase): + @override_settings(ERRATA_JSON_BLOB_NAME="myblob.json") + def test_get_errata_last_update(self): + red_bucket = storages["red_bucket"] # InMemoryStorage in test + red_bucket.save("myblob.json", ContentFile("file")) + self.assertEqual( + get_errata_last_updated(), red_bucket.get_modified_time("myblob.json") + ) + + @override_settings(ERRATA_JSON_BLOB_NAME="myblob.json") + def test_get_errata_data(self): + red_bucket = storages["red_bucket"] # InMemoryStorage in test + red_bucket.save("myblob.json", ContentFile('[{"value": 3}]')) + self.assertEqual( + get_errata_data(), + [{"value": 3}], + ) + + def test_errata_map_from_json(self): + input_data = [ + { + "doc-id": "not-an-rfc", + "errata_status_code": "Verified", + }, + { + "doc-id": "rfc01234", + "errata_status_code": "Reported", + }, + { + "doc-id": "RFC1001", + "errata_status_code": "Verified" + }, + { + "doc-id": "RfC1234", + "errata_status_code": "Verified", + }, + ] + expected_output = {1001: [input_data[2]], 1234: [input_data[1], input_data[3]]} + self.assertDictEqual(errata_map_from_json(input_data), expected_output) + + @mock.patch("ietf.sync.errata.update_errata_tags") + @mock.patch("ietf.sync.errata.get_errata_data") + def test_update_errata_from_rfceditor(self, mock_get_data, mock_update): + fake_data = object() + mock_get_data.return_value = fake_data + update_errata_from_rfceditor() + self.assertTrue(mock_get_data.called) + self.assertTrue(mock_update.called) + self.assertEqual(mock_update.call_args, mock.call(fake_data)) + + def test_update_errata_tags(self): + tag_has_errata = DocTagName.objects.get(slug="errata") + tag_has_verified_errata = DocTagName.objects.get(slug="verified-errata") + + rfcs = WgRfcFactory.create_batch(10) + rfcs[0].tags.set([tag_has_errata]) + rfcs[1].tags.set([tag_has_errata, tag_has_verified_errata]) + rfcs[2].tags.set([tag_has_errata]) + rfcs[3].tags.set([tag_has_errata, tag_has_verified_errata]) + rfcs[4].tags.set([tag_has_errata]) + rfcs[5].tags.set([tag_has_errata, tag_has_verified_errata]) + + # Only contains the fields we care about, not the full JSON + errata_data = [ + # rfcs[0] had errata and should keep it + {"doc-id": rfcs[0].name, "errata_status_code": "Held for Document Update"}, + {"doc-id": rfcs[0].name, "errata_status_code": "Rejected"}, + # rfcs[1] had errata+verified-errata and should keep both + {"doc-id": rfcs[1].name, "errata_status_code": "Verified"}, + # rfcs[2] had errata and should gain verified-errata + {"doc-id": rfcs[2].name, "errata_status_code": "Verified"}, + # rfcs[3] had errata+verified errata and should lose both + {"doc-id": rfcs[3].name, "errata_status_code": "Rejected"}, + # rfcs[4] had errata and should gain verified-errata + {"doc-id": rfcs[4].name, "errata_status_code": "Verified"}, + {"doc-id": rfcs[4].name, "errata_status_code": "Reported"}, + # rfcs[5] had errata+verified-errata and should lose verified-errata + {"doc-id": rfcs[5].name, "errata_status_code": "Reported"}, + # rfcs[6] had none and should gain errata + {"doc-id": rfcs[6].name, "errata_status_code": "Reported"}, + # rfcs[7] had none and should gain errata+verified-errata + {"doc-id": rfcs[7].name, "errata_status_code": "Verified"}, + # rfcs[8] had none and it should stay that way + {"doc-id": rfcs[8].name, "errata_status_code": "Rejected"}, + # rfcs[9] had none and it should stay that way (no entry at all) + ] + update_errata_tags(errata_data) + + self.assertCountEqual(rfcs[0].tags.all(), [tag_has_errata]) + self.assertIsNone(rfcs[0].docevent_set.first()) # no change + + self.assertCountEqual( + rfcs[1].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertIsNone(rfcs[1].docevent_set.first()) # no change + + self.assertCountEqual( + rfcs[2].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertEqual(rfcs[2].docevent_set.count(), 1) + self.assertIn(": added verified-errata tag", rfcs[2].docevent_set.first().desc) + + self.assertCountEqual(rfcs[3].tags.all(), []) + self.assertEqual(rfcs[3].docevent_set.count(), 1) + self.assertIn( + ": removed errata tag, removed verified-errata tag (all errata rejected)", + rfcs[3].docevent_set.first().desc, + ) + + self.assertCountEqual( + rfcs[4].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertEqual(rfcs[4].docevent_set.count(), 1) + self.assertIn(": added verified-errata tag", rfcs[4].docevent_set.first().desc) + + self.assertCountEqual(rfcs[5].tags.all(), [tag_has_errata]) + self.assertEqual(rfcs[5].docevent_set.count(), 1) + self.assertIn( + ": removed verified-errata tag", rfcs[5].docevent_set.first().desc + ) + + self.assertCountEqual(rfcs[6].tags.all(), [tag_has_errata]) + self.assertEqual(rfcs[6].docevent_set.count(), 1) + self.assertIn(": added errata tag", rfcs[6].docevent_set.first().desc) + + self.assertCountEqual( + rfcs[7].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertEqual(rfcs[7].docevent_set.count(), 1) + self.assertIn( + ": added errata tag, added verified-errata tag", + rfcs[7].docevent_set.first().desc, + ) + + self.assertCountEqual(rfcs[8].tags.all(), []) + self.assertIsNone(rfcs[8].docevent_set.first()) # no change + + self.assertCountEqual(rfcs[9].tags.all(), []) + self.assertIsNone(rfcs[9].docevent_set.first()) # no change + + @override_settings(ERRATA_JSON_BLOB_NAME="myblob.json") + @mock.patch("ietf.sync.errata.get_errata_last_updated") + def test_update_errata_dirty_time(self, mock_last_updated): + ERRATA_SLUG = DirtyBits.Slugs.ERRATA + + # No time available + mock_last_updated.side_effect = FileNotFoundError + self.assertIsNone(DirtyBits.objects.filter(slug=ERRATA_SLUG).first()) + self.assertIsNone(update_errata_dirty_time()) # no blob yet + self.assertIsNone(DirtyBits.objects.filter(slug=ERRATA_SLUG).first()) + + # Now set a time + first_timestamp = timezone.now() - datetime.timedelta(hours=3) + mock_last_updated.return_value = first_timestamp + mock_last_updated.side_effect = None + result = update_errata_dirty_time() + self.assertTrue(isinstance(result, DirtyBits)) + result.refresh_from_db() + self.assertEqual(result.slug, ERRATA_SLUG) + self.assertEqual(result.processed_time, None) + self.assertEqual(result.dirty_time, first_timestamp) + + # Update the time + second_timestamp = timezone.now() + mock_last_updated.return_value = second_timestamp + second_result = update_errata_dirty_time() + self.assertEqual(result.pk, second_result.pk) # should be the same record + result.refresh_from_db() + self.assertEqual(result.slug, ERRATA_SLUG) + self.assertEqual(result.processed_time, None) + self.assertEqual(result.dirty_time, second_timestamp) + + def test_mark_errata_as_processed(self): + ERRATA_SLUG = DirtyBits.Slugs.ERRATA + first_timestamp = timezone.now() + mark_errata_as_processed(first_timestamp) # no DirtyBits is not an error + self.assertIsNone(DirtyBits.objects.filter(slug=ERRATA_SLUG).first()) + dbits = DirtyBits.objects.create(slug=ERRATA_SLUG, dirty_time=first_timestamp) + second_timestamp = timezone.now() + mark_errata_as_processed(second_timestamp) + dbits.refresh_from_db() + self.assertEqual(dbits.dirty_time, first_timestamp) + self.assertEqual(dbits.processed_time, second_timestamp) + + class TaskTests(TestCase): @override_settings( RFC_EDITOR_INDEX_URL="https://rfc-editor.example.com/index/", From 96e227b89b349e67c65e6f7149577974ed3021dd Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 19:43:17 -0300 Subject: [PATCH 18/27] fix: guard map lookup --- ietf/sync/errata.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 8a4c13c3a9..9f71cf111b 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -109,8 +109,9 @@ def update_errata_tags(errata_data): rfc.tags.remove(tag) change_descs.append(f"removed {tag.slug} tag") summary = "Update from RFC Editor: " + ", ".join(change_descs) - if all( - er["errata_status_code"] == "Rejected" for er in errata_map[rfc.rfc_number] + if rfc.rfc_number in errata_map and all( + er["errata_status_code"] == "Rejected" + for er in errata_map[rfc.rfc_number] ): summary += " (all errata rejected)" DocEvent.objects.create( From a34917937cafd0637a22e60e06e1dbcad72dcc53 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Apr 2026 20:04:06 -0300 Subject: [PATCH 19/27] fix: lint / typing --- ietf/sync/errata.py | 9 +++++---- ietf/sync/tests.py | 22 ++++++++++++++++++---- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py index 9f71cf111b..113d987291 100644 --- a/ietf/sync/errata.py +++ b/ietf/sync/errata.py @@ -2,7 +2,7 @@ import datetime import json from collections import defaultdict -from typing import DefaultDict, Literal +from typing import DefaultDict from django.conf import settings from django.core.files.storage import storages @@ -18,6 +18,7 @@ DEFAULT_ERRATA_JSON_BLOB_NAME = "other/errata.json" +type ErrataJsonEntry = dict[str, str] def get_errata_last_updated() -> datetime.datetime: """Get timestamp of the last errata.json update @@ -30,7 +31,7 @@ def get_errata_last_updated() -> datetime.datetime: ) -def get_errata_data(): +def get_errata_data() -> list[ErrataJsonEntry]: red_bucket = storages["red_bucket"] with red_bucket.open( getattr(settings, "ERRATA_JSON_BLOB_NAME", DEFAULT_ERRATA_JSON_BLOB_NAME), "r" @@ -39,7 +40,7 @@ def get_errata_data(): return errata_data -def errata_map_from_json(errata_data): +def errata_map_from_json(errata_data: list[ErrataJsonEntry]): """Create a dict mapping RFC number to a list of applicable errata records""" errata = defaultdict(list) for item in errata_data: @@ -50,7 +51,7 @@ def errata_map_from_json(errata_data): return dict(errata) -def update_errata_tags(errata_data): +def update_errata_tags(errata_data: list[ErrataJsonEntry]): tag_has_errata = DocTagName.objects.get(slug="errata") tag_has_verified_errata = DocTagName.objects.get(slug="verified-errata") system = Person.objects.get(name="(System)") diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index eb70bcf9e9..e83b6a5e0a 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -28,15 +28,29 @@ DocEventFactory, BcpFactory, WgRfcFactory, ) -from ietf.doc.models import Document, DocEvent, DeletedEvent, DocTagName, RelatedDocument, State, StateDocEvent +from ietf.doc.models import ( + Document, + DocEvent, + DeletedEvent, + DocTagName, + RelatedDocument, + State, + StateDocEvent, +) from ietf.doc.utils import add_state_change_event from ietf.group.factories import GroupFactory from ietf.person.factories import PersonFactory from ietf.person.models import Person from ietf.sync import iana, rfceditor, tasks -from ietf.sync.errata import update_errata_from_rfceditor, get_errata_last_updated, \ - get_errata_data, errata_map_from_json, update_errata_dirty_time, \ - mark_errata_as_processed, update_errata_tags +from ietf.sync.errata import ( + update_errata_from_rfceditor, + get_errata_last_updated, + get_errata_data, + errata_map_from_json, + update_errata_dirty_time, + mark_errata_as_processed, + update_errata_tags, +) from ietf.sync.tasks import update_errata_from_rfceditor_task from ietf.utils.mail import outbox, empty_outbox from ietf.utils.models import DirtyBits From 15b2abd937f5d6e2b71667501309a965fdd9dc9d Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 16 Apr 2026 12:12:58 -0500 Subject: [PATCH 20/27] feat: serve notprepped rfc xml from datatracker (#10692) * chore: move v3 RFC boundary number to settings * feat: button for downloading unprepped RFCXML * chore: ruff * chore: refactor test to avoid setup overhead * feat: serve notprepped bytes from blobdb * fix: typo * chore; improve test class name * feat: wrapper explaining notprepped xml --- ietf/doc/feeds.py | 3 +- ietf/doc/tests_unprepped.py | 118 +++++++++++++++++++++ ietf/doc/urls.py | 2 + ietf/doc/views_doc.py | 31 +++++- ietf/settings.py | 4 +- ietf/sync/rfcindex.py | 2 +- ietf/templates/doc/document_rfc.html | 9 ++ ietf/templates/doc/notprepped_wrapper.html | 26 +++++ 8 files changed, 190 insertions(+), 5 deletions(-) create mode 100644 ietf/doc/tests_unprepped.py create mode 100644 ietf/templates/doc/notprepped_wrapper.html diff --git a/ietf/doc/feeds.py b/ietf/doc/feeds.py index afe96cf0df..0269906fcf 100644 --- a/ietf/doc/feeds.py +++ b/ietf/doc/feeds.py @@ -5,6 +5,7 @@ import datetime import unicodedata +from django.conf import settings from django.contrib.syndication.views import Feed, FeedDoesNotExist from django.utils.feedgenerator import Atom1Feed, Rss201rev2Feed from django.urls import reverse as urlreverse @@ -223,7 +224,7 @@ def item_extra_kwargs(self, item): extra.update({"dcterms_accessRights": "gratis"}) extra.update({"dcterms_format": "text/html"}) media_contents = [] - if item.rfc_number < 8650: + if item.rfc_number < settings.FIRST_V3_RFC: if item.rfc_number not in [8, 9, 51, 418, 500, 530, 589]: for fmt, media_type in [("txt", "text/plain"), ("html", "text/html")]: media_contents.append( diff --git a/ietf/doc/tests_unprepped.py b/ietf/doc/tests_unprepped.py new file mode 100644 index 0000000000..f88af8e81a --- /dev/null +++ b/ietf/doc/tests_unprepped.py @@ -0,0 +1,118 @@ +# Copyright The IETF Trust 2026, All Rights Reserved + +from django.conf import settings +from django.utils import timezone +from django.urls import reverse as urlreverse + +from pyquery import PyQuery + +from ietf.doc.factories import WgRfcFactory +from ietf.doc.models import StoredObject +from ietf.doc.storage_utils import store_bytes +from ietf.utils.test_utils import TestCase + + +class UnpreppedRfcXmlTests(TestCase): + def test_editor_source_button_visibility(self): + pre_v3 = WgRfcFactory(rfc_number=settings.FIRST_V3_RFC - 1) + first_v3 = WgRfcFactory(rfc_number=settings.FIRST_V3_RFC) + post_v3 = WgRfcFactory(rfc_number=settings.FIRST_V3_RFC + 1) + + for rfc, expect_button in [(pre_v3, False), (first_v3, True), (post_v3, True)]: + r = self.client.get( + urlreverse( + "ietf.doc.views_doc.document_main", kwargs=dict(name=rfc.name) + ) + ) + self.assertEqual(r.status_code, 200) + buttons = PyQuery(r.content)('a.btn:contains("Get editor source")') + if expect_button: + self.assertEqual(len(buttons), 1, msg=f"rfc_number={rfc.rfc_number}") + expected_href = urlreverse( + "ietf.doc.views_doc.rfcxml_notprepped_wrapper", + kwargs=dict(number=rfc.rfc_number), + ) + self.assertEqual( + buttons.attr("href"), + expected_href, + msg=f"rfc_number={rfc.rfc_number}", + ) + else: + self.assertEqual(len(buttons), 0, msg=f"rfc_number={rfc.rfc_number}") + + def test_rfcxml_notprepped(self): + number = settings.FIRST_V3_RFC + stored_name = f"notprepped/rfc{number}.notprepped.xml" + url = f"/doc/rfc{number}/notprepped/" + + # 404 for pre-v3 RFC numbers (no document needed) + r = self.client.get(f"/doc/rfc{number - 1}/notprepped/") + self.assertEqual(r.status_code, 404) + + # 404 when no RFC document exists in the database + r = self.client.get(url) + self.assertEqual(r.status_code, 404) + + # 404 when RFC document exists but has no StoredObject + WgRfcFactory(rfc_number=number) + r = self.client.get(url) + self.assertEqual(r.status_code, 404) + + # 404 when StoredObject exists but backing storage is missing (FileNotFoundError) + now = timezone.now() + StoredObject.objects.create( + store="rfc", + name=stored_name, + sha384="a" * 96, + len=0, + store_created=now, + created=now, + modified=now, + ) + r = self.client.get(url) + self.assertEqual(r.status_code, 404) + + # 200 with correct content-type and body when object is fully stored + xml_content = b"test" + store_bytes("rfc", stored_name, xml_content, allow_overwrite=True) + r = self.client.get(url) + self.assertEqual(r.status_code, 200) + self.assertEqual(r["Content-Type"], "application/xml") + self.assertEqual(r.content, xml_content) + + def test_rfcxml_notprepped_wrapper(self): + number = settings.FIRST_V3_RFC + + # 404 for pre-v3 RFC numbers (no document needed) + r = self.client.get( + urlreverse( + "ietf.doc.views_doc.rfcxml_notprepped_wrapper", + kwargs=dict(number=number - 1), + ) + ) + self.assertEqual(r.status_code, 404) + + # 404 when no RFC document exists in the database + r = self.client.get( + urlreverse( + "ietf.doc.views_doc.rfcxml_notprepped_wrapper", + kwargs=dict(number=number), + ) + ) + self.assertEqual(r.status_code, 404) + + # 200 with rendered template when RFC document exists + rfc = WgRfcFactory(rfc_number=number) + r = self.client.get( + urlreverse( + "ietf.doc.views_doc.rfcxml_notprepped_wrapper", + kwargs=dict(number=number), + ) + ) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertIn(str(rfc.rfc_number), q("h1").text()) + download_url = urlreverse( + "ietf.doc.views_doc.rfcxml_notprepped", kwargs=dict(number=number) + ) + self.assertEqual(len(q(f'a.btn[href="{download_url}"]')), 1) diff --git a/ietf/doc/urls.py b/ietf/doc/urls.py index 61e94b2231..0c13503b78 100644 --- a/ietf/doc/urls.py +++ b/ietf/doc/urls.py @@ -99,6 +99,8 @@ url(r'^%(name)s(?:/%(rev)s)?/$' % settings.URL_REGEXPS, views_doc.document_main), url(r'^%(name)s(?:/%(rev)s)?/bibtex/$' % settings.URL_REGEXPS, views_doc.document_bibtex), + url(r'^rfc(?P[0-9]+)/notprepped/$' , views_doc.rfcxml_notprepped), + url(r'^rfc(?P[0-9]+)/notprepped-wrapper/$', views_doc.rfcxml_notprepped_wrapper), url(r'^%(name)s(?:/%(rev)s)?/idnits2-state/$' % settings.URL_REGEXPS, views_doc.idnits2_state), url(r'^bibxml3/reference.I-D.%(name)s(?:-%(rev)s)?.xml$' % settings.URL_REGEXPS, views_doc.document_bibxml_ref), url(r'^bibxml3/%(name)s(?:-%(rev)s)?.xml$' % settings.URL_REGEXPS, views_doc.document_bibxml), diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index c1f6352ac3..a23185333e 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -1,4 +1,4 @@ -# Copyright The IETF Trust 2009-2024, All Rights Reserved +# Copyright The IETF Trust 2009-2026, All Rights Reserved # -*- coding: utf-8 -*- # # Parts Copyright (C) 2009-2010 Nokia Corporation and/or its subsidiary(-ies). @@ -57,7 +57,7 @@ import debug # pyflakes:ignore from ietf.doc.models import ( Document, DocHistory, DocEvent, BallotDocEvent, BallotType, - ConsensusDocEvent, NewRevisionDocEvent, TelechatDocEvent, WriteupDocEvent, IanaExpertDocEvent, + ConsensusDocEvent, NewRevisionDocEvent, StoredObject, TelechatDocEvent, WriteupDocEvent, IanaExpertDocEvent, IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor, RelatedDocument, RelatedDocHistory) from ietf.doc.tasks import investigate_fragment_task @@ -86,6 +86,7 @@ from ietf.review.models import ReviewAssignment from ietf.review.utils import can_request_review_of_doc, review_assignments_to_list_for_docs, review_requests_to_list_for_docs from ietf.review.utils import no_review_from_teams_on_doc +from ietf.doc.storage_utils import retrieve_bytes from ietf.utils import markup_txt, log, markdown from ietf.utils.draft import get_status_from_draft_text from ietf.utils.meetecho import MeetechoAPIError, SlidesManager @@ -2356,3 +2357,29 @@ def investigate(request): "results": results, }, ) + +def rfcxml_notprepped(request, number): + number = int(number) + if number < settings.FIRST_V3_RFC: + raise Http404 + rfc = Document.objects.filter(type="rfc", rfc_number=number).first() + if rfc is None: + raise Http404 + name = f"notprepped/rfc{number}.notprepped.xml" + if not StoredObject.objects.filter(name=name).exists(): + raise Http404 + try: + bytes = retrieve_bytes("rfc", name) + except FileNotFoundError: + raise Http404 + return HttpResponse(bytes, content_type="application/xml") + + +def rfcxml_notprepped_wrapper(request, number): + number = int(number) + if number < settings.FIRST_V3_RFC: + raise Http404 + rfc = Document.objects.filter(type="rfc", rfc_number=number).first() + if rfc is None: + raise Http404 + return render(request, "doc/notprepped_wrapper.html", context={"rfc": rfc}) diff --git a/ietf/settings.py b/ietf/settings.py index 40a4cb5c56..3aa45a453c 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -235,7 +235,9 @@ AUTHENTICATION_BACKENDS = ( 'ietf.ietfauth.backends.CaseInsensitiveModelBackend', ) -FILE_UPLOAD_PERMISSIONS = 0o644 +FILE_UPLOAD_PERMISSIONS = 0o644 + +FIRST_V3_RFC = 8650 # diff --git a/ietf/sync/rfcindex.py b/ietf/sync/rfcindex.py index 6864617874..d1a0ed432f 100644 --- a/ietf/sync/rfcindex.py +++ b/ietf/sync/rfcindex.py @@ -153,7 +153,7 @@ def get_publication_std_levels() -> dict[int, StdLevelName]: def format_ordering(rfc_number): - if rfc_number < 8650: + if rfc_number < settings.FIRST_V3_RFC: ordering = ["txt", "ps", "pdf", "html", "xml"] else: ordering = ["html", "txt", "ps", "pdf", "xml"] diff --git a/ietf/templates/doc/document_rfc.html b/ietf/templates/doc/document_rfc.html index 7612ef8910..d4b309a964 100644 --- a/ietf/templates/doc/document_rfc.html +++ b/ietf/templates/doc/document_rfc.html @@ -124,6 +124,15 @@ Referenced by + {% if doc.rfc_number >= settings.FIRST_V3_RFC %} + + + + Get editor source + + {% endif %} RFC {{ rfc.rfc_number }} — Not-prepped XML +

+ The not-prepped XML + is the RFC XML v3 source for an RFC at the moment in the publication process + just before the prep tool was used to expand default + values, generate section numbers, resolve cross-references, and embed + boilerplate. +

+ It is useful for authors who want to begin a new draft based on + the RFC's text, such as when creating a bis-draft, and for tools that process + author-facing RFC XML. +

+

+ + + Download not-prepped XML for RFC {{ rfc.rfc_number }} + +

+{% endblock %} From 20480d6242254693b3dbcbf9b73380b5b3c838cb Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 16 Apr 2026 13:59:23 -0500 Subject: [PATCH 21/27] fix: force notprepped downloads (#10719) --- ietf/doc/{tests_unprepped.py => tests_notprepped.py} | 10 +++++++--- ietf/doc/views_doc.py | 5 +++-- 2 files changed, 10 insertions(+), 5 deletions(-) rename ietf/doc/{tests_unprepped.py => tests_notprepped.py} (92%) diff --git a/ietf/doc/tests_unprepped.py b/ietf/doc/tests_notprepped.py similarity index 92% rename from ietf/doc/tests_unprepped.py rename to ietf/doc/tests_notprepped.py index f88af8e81a..f417aa7931 100644 --- a/ietf/doc/tests_unprepped.py +++ b/ietf/doc/tests_notprepped.py @@ -12,7 +12,7 @@ from ietf.utils.test_utils import TestCase -class UnpreppedRfcXmlTests(TestCase): +class NotpreppedRfcXmlTests(TestCase): def test_editor_source_button_visibility(self): pre_v3 = WgRfcFactory(rfc_number=settings.FIRST_V3_RFC - 1) first_v3 = WgRfcFactory(rfc_number=settings.FIRST_V3_RFC) @@ -72,13 +72,17 @@ def test_rfcxml_notprepped(self): r = self.client.get(url) self.assertEqual(r.status_code, 404) - # 200 with correct content-type and body when object is fully stored + # 200 with correct content-type, attachment disposition, and body when object is fully stored xml_content = b"test" store_bytes("rfc", stored_name, xml_content, allow_overwrite=True) r = self.client.get(url) self.assertEqual(r.status_code, 200) self.assertEqual(r["Content-Type"], "application/xml") - self.assertEqual(r.content, xml_content) + self.assertEqual( + r["Content-Disposition"], + f'attachment; filename="rfc{number}.notprepped.xml"', + ) + self.assertEqual(b"".join(r.streaming_content), xml_content) def test_rfcxml_notprepped_wrapper(self): number = settings.FIRST_V3_RFC diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index a23185333e..5b57a62074 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -43,9 +43,10 @@ from celery.result import AsyncResult from django.core.cache import caches +from django.core.files.base import ContentFile from django.core.exceptions import PermissionDenied from django.db.models import Max -from django.http import HttpResponse, Http404, HttpResponseBadRequest, JsonResponse +from django.http import FileResponse, HttpResponse, Http404, HttpResponseBadRequest, JsonResponse from django.shortcuts import render, get_object_or_404, redirect from django.template.loader import render_to_string from django.urls import reverse as urlreverse @@ -2372,7 +2373,7 @@ def rfcxml_notprepped(request, number): bytes = retrieve_bytes("rfc", name) except FileNotFoundError: raise Http404 - return HttpResponse(bytes, content_type="application/xml") + return FileResponse(ContentFile(bytes, name=f"rfc{number}.notprepped.xml"), as_attachment=True) def rfcxml_notprepped_wrapper(request, number): From 9cecc36bc7e42ecc5cd196d96f4bd0eaf03b5e69 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Fri, 17 Apr 2026 00:25:02 -0300 Subject: [PATCH 22/27] feat: rebuild_searchindex task (#10723) * refactor: DRY * chore: typesense docker container (commented out) * feat: batched RFC search index import * feat: rebuild_searchindex task * feat: logging / error reporting * refactor: _task suffix for task name * test: tests for searchindex utils + tasks * fix: only create collection if dropped * fix: typing / lint --- docker-compose.yml | 12 ++ ietf/doc/tasks.py | 11 ++ ietf/doc/tests_tasks.py | 43 ++++++ ietf/utils/searchindex.py | 239 ++++++++++++++++++++++++++++++-- ietf/utils/tests_searchindex.py | 152 +++++++++++++++----- 5 files changed, 410 insertions(+), 47 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 4c3f2f6b8e..073d04b896 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -132,6 +132,18 @@ services: volumes: - blobdb-data:/var/lib/postgresql/data +# typesense: +# image: typesense/typesense:30.1 +# restart: on-failure +# ports: +# - "8108:8108" +# volumes: +# - ./typesense-data:/data +# command: +# - '--data-dir=/data' +# - '--api-key=typesense-api-key' +# - '--enable-cors' + # Celery Beat is a periodic task runner. It is not normally needed for development, # but can be enabled by uncommenting the following. # diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index 19edb39014..273242e35f 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -209,3 +209,14 @@ def update_rfc_searchindex_task(self, rfc_number: int): countdown=searchindex_settings["TASK_RETRY_DELAY"], max_retries=searchindex_settings["TASK_MAX_RETRIES"], ) + + +@shared_task +def rebuild_searchindex_task(*, batchsize=40, drop_collection=False): + if drop_collection: + searchindex.delete_collection() + searchindex.create_collection() + searchindex.update_or_create_rfc_entries( + Document.objects.filter(type_id="rfc").order_by("-rfc_number"), + batchsize=batchsize, + ) diff --git a/ietf/doc/tests_tasks.py b/ietf/doc/tests_tasks.py index 728d21f131..2e2d65463f 100644 --- a/ietf/doc/tests_tasks.py +++ b/ietf/doc/tests_tasks.py @@ -24,6 +24,7 @@ generate_idnits2_rfc_status_task, investigate_fragment_task, notify_expirations_task, + rebuild_searchindex_task, update_rfc_searchindex_task, ) @@ -144,6 +145,48 @@ def test_update_rfc_searchindex_task( with self.assertRaises(Retry): update_rfc_searchindex_task(rfc_number=rfc.rfc_number) + @mock.patch("ietf.doc.tasks.searchindex.update_or_create_rfc_entries") + @mock.patch("ietf.doc.tasks.searchindex.create_collection") + @mock.patch("ietf.doc.tasks.searchindex.delete_collection") + def test_rebuild_searchindex_task(self, mock_delete, mock_create, mock_update): + rfcs = WgRfcFactory.create_batch(10) + rebuild_searchindex_task() + self.assertFalse(mock_delete.called) + self.assertFalse(mock_create.called) + self.assertTrue(mock_update.called) + self.assertQuerysetEqual( + mock_update.call_args.args[0], + sorted(rfcs, key=lambda doc: -doc.rfc_number), + ordered=True, + ) + + mock_delete.reset_mock() + mock_create.reset_mock() + mock_update.reset_mock() + rebuild_searchindex_task(drop_collection=True) + self.assertTrue(mock_delete.called) + self.assertTrue(mock_create.called) + self.assertTrue(mock_update.called) + self.assertQuerysetEqual( + mock_update.call_args.args[0], + sorted(rfcs, key=lambda doc: -doc.rfc_number), + ordered=True, + ) + + mock_delete.reset_mock() + mock_create.reset_mock() + mock_update.reset_mock() + rebuild_searchindex_task(drop_collection=True, batchsize=3) + self.assertTrue(mock_delete.called) + self.assertTrue(mock_create.called) + self.assertTrue(mock_update.called) + self.assertQuerysetEqual( + mock_update.call_args.args[0], + sorted(rfcs, key=lambda doc: -doc.rfc_number), + ordered=True, + ) + self.assertEqual(mock_update.call_args.kwargs["batchsize"], 3) + class Idnits2SupportTests(TestCase): settings_temp_path_overrides = TestCase.settings_temp_path_overrides + [ diff --git a/ietf/utils/searchindex.py b/ietf/utils/searchindex.py index e4427b88b5..a47e6d2f12 100644 --- a/ietf/utils/searchindex.py +++ b/ietf/utils/searchindex.py @@ -2,12 +2,15 @@ """Search indexing utilities""" import re +from itertools import batched from math import floor +from typing import Iterable import httpx # just for exceptions import typesense import typesense.exceptions from django.conf import settings +from typesense.types.document import DocumentSchema from ietf.doc.models import Document, StoredObject from ietf.doc.storage_utils import retrieve_str @@ -42,6 +45,24 @@ def enabled(): return _settings["TYPESENSE_API_URL"] != "" +def get_typesense_client() -> typesense.Client: + _settings = get_settings() + client = typesense.Client( + { + "api_key": _settings["TYPESENSE_API_KEY"], + "nodes": [_settings["TYPESENSE_API_URL"]], + } + ) + return client + + +def get_collection_name() -> str: + _settings = get_settings() + collection_name = _settings["TYPESENSE_COLLECTION_NAME"] + assert isinstance(collection_name, str) + return collection_name + + def _sanitize_text(content): """Sanitize content or abstract text for search""" # REs (with approximate names) @@ -62,7 +83,7 @@ def _sanitize_text(content): return content.strip() -def update_or_create_rfc_entry(rfc: Document): +def typesense_doc_from_rfc(rfc: Document) -> DocumentSchema: assert rfc.type_id == "rfc" assert rfc.rfc_number is not None @@ -75,8 +96,8 @@ def update_or_create_rfc_entry(rfc: Document): f"Indexing as {subseries[0].name}" ) subseries = subseries[0] if len(subseries) > 0 else None - obsoleted_by = rfc.relations_that("obs") - updated_by = rfc.relations_that("updates") + obsoleted_by = rfc.related_that("obs") + updated_by = rfc.related_that("updates") stored_txt = ( StoredObject.objects.exclude_deleted() @@ -91,8 +112,8 @@ def update_or_create_rfc_entry(rfc: Document): except Exception as err: log(f"Unable to retrieve {stored_txt} from storage: {err}") - ts_id = f"doc-{rfc.pk}" ts_document = { + "id": f"doc-{rfc.pk}", "rfcNumber": rfc.rfc_number, "rfc": str(rfc.rfc_number), "filename": rfc.name, @@ -143,13 +164,205 @@ def update_or_create_rfc_entry(rfc: Document): ts_document["adName"] = rfc.ad.name if content != "": ts_document["content"] = _sanitize_text(content) - _settings = get_settings() - client = typesense.Client( + return ts_document + + +def update_or_create_rfc_entry(rfc: Document): + """Update/create index entries for one RFC""" + ts_document = typesense_doc_from_rfc(rfc) + client = get_typesense_client() + client.collections[get_collection_name()].documents.upsert(ts_document) + + +def update_or_create_rfc_entries( + rfcs: Iterable[Document], batchsize: int | None = None +): + """Update/create index entries for RFCs in bulk + + If batchsize is set, computes index data in batches of batchsize and adds to the + index. Will make a total of (len(rfcs) // batchsize) + 1 API calls. + + N.b. that typesense has a server-side batch size that defaults to 40, which should + "almost never be changed from the default." This does not change that. Further, + the python client library's import_ method has a batch_size parameter that does + client-side batching. We don't use that, either. + """ + success_count = 0 + fail_count = 0 + client = get_typesense_client() + batches = [rfcs] if batchsize is None else batched(rfcs, batchsize) + for batch in batches: + tdoc_batch = [typesense_doc_from_rfc(rfc) for rfc in batch] + results = client.collections[get_collection_name()].documents.import_( + tdoc_batch, {"action": "upsert"} + ) + for tdoc, result in zip(tdoc_batch, results): + if result["success"]: + success_count += 1 + else: + fail_count += 1 + log(f"Failed to index RFC {tdoc['rfcNumber']}: {result['error']}") + log(f"Added {success_count} RFCs to the index, failed to add {fail_count}") + + +DOCS_SCHEMA = { + "enable_nested_fields": True, + "default_sorting_field": "ranking", + "fields": [ + # RFC number in integer form, for sorting asc/desc in search results + # Omit field for drafts { - "api_key": _settings["TYPESENSE_API_KEY"], - "nodes": [_settings["TYPESENSE_API_URL"]], - } - ) - client.collections[_settings["TYPESENSE_COLLECTION_NAME"]].documents.upsert( - {"id": ts_id} | ts_document - ) + "name": "rfcNumber", + "type": "int32", + "facet": False, + "optional": True, + "sort": True, + }, + # RFC number in string form, for direct matching with ranking + # Omit field for drafts + {"name": "rfc", "type": "string", "facet": False, "optional": True}, + # For drafts that correspond to an RFC, insert the RFC number + # Omit field for rfcs or if not relevant + {"name": "ref", "type": "string", "facet": False, "optional": True}, + # Filename of the document (without the extension, e.g. "rfc1234" + # or "draft-ietf-abc-def-02") + {"name": "filename", "type": "string", "facet": False, "infix": True}, + # Title of the draft / rfc + {"name": "title", "type": "string", "facet": False}, + # Abstract of the draft / rfc + {"name": "abstract", "type": "string", "facet": False}, + # A list of search keywords if relevant, set to empty array otherwise + {"name": "keywords", "type": "string[]", "facet": True}, + # Type of the document + # Accepted values: "draft" or "rfc" + {"name": "type", "type": "string", "facet": True}, + # State(s) of the document (e.g. "Published", "Adopted by a WG", etc.) + # Use the full name, not the slug + {"name": "state", "type": "string[]", "facet": True, "optional": True}, + # Status (Standard Level Name) + # Object with properties "slug" and "name" + # e.g.: { slug: "std", "name": "Internet Standard" } + {"name": "status", "type": "object", "facet": True, "optional": True}, + # The subseries it is part of. (e.g. "BCP") + # Omit otherwise. + { + "name": "subseries.acronym", + "type": "string", + "facet": True, + "optional": True, + }, + # The subseries number it is part of. (e.g. 123) + # Omit otherwise. + { + "name": "subseries.number", + "type": "int32", + "facet": True, + "sort": True, + "optional": True, + }, + # The total of RFCs in the subseries + # Omit if not part of a subseries + { + "name": "subseries.total", + "type": "int32", + "facet": False, + "sort": False, + "optional": True, + }, + # Date of the document, in unix epoch seconds (can be negative for < 1970) + {"name": "date", "type": "int64", "facet": False}, + # Expiration date of the document, in unix epoch seconds (can be negative + # for < 1970). Omit field for RFCs + {"name": "expires", "type": "int64", "facet": False, "optional": True}, + # Publication date of the RFC, in unix epoch seconds (can be negative + # for < 1970). Omit field for drafts + { + "name": "publicationDate", + "type": "int64", + "facet": True, + "optional": True, + }, + # Working Group + # Object with properties "acronym", "name" and "full" + # e.g.: + # { + # "acronym": "ntp", + # "name": "Network Time Protocols", + # "full": "ntp - Network Time Protocols", + # } + {"name": "group", "type": "object", "facet": True, "optional": True}, + # Area + # Object with properties "acronym", "name" and "full" + # e.g.: + # { + # "acronym": "mpls", + # "name": "Multiprotocol Label Switching", + # "full": "mpls - Multiprotocol Label Switching", + # } + {"name": "area", "type": "object", "facet": True, "optional": True}, + # Stream + # Object with properties "slug" and "name" + # e.g.: { slug: "ietf", "name": "IETF" } + {"name": "stream", "type": "object", "facet": True, "optional": True}, + # List of authors + # Array of objects with properties "name" and "affiliation" + # e.g.: + # [ + # {"name": "John Doe", "affiliation": "ACME Inc."}, + # {"name": "Ada Lovelace", "affiliation": "Babbage Corps."}, + # ] + {"name": "authors", "type": "object[]", "facet": True, "optional": True}, + # Area Director Name (e.g. "Leonardo DaVinci") + {"name": "adName", "type": "string", "facet": True, "optional": True}, + # Whether the document should be hidden by default in search results or not. + {"name": "flags.hiddenDefault", "type": "bool", "facet": True}, + # Whether the document is obsoleted by another document or not. + {"name": "flags.obsoleted", "type": "bool", "facet": True}, + # Whether the document is updated by another document or not. + {"name": "flags.updated", "type": "bool", "facet": True}, + # List of documents that obsolete this document. + # Array of strings. Use RFC number for RFCs. (e.g. ["123", "456"]) + # Omit if none. Must be provided if "flags.obsoleted" is set to True. + { + "name": "obsoletedBy", + "type": "string[]", + "facet": False, + "optional": True, + }, + # List of documents that update this document. + # Array of strings. Use RFC number for RFCs. (e.g. ["123", "456"]) + # Omit if none. Must be provided if "flags.updated" is set to True. + {"name": "updatedBy", "type": "string[]", "facet": False, "optional": True}, + # Sanitized content of the document. + # Make sure to remove newlines, double whitespaces, symbols and tags. + { + "name": "content", + "type": "string", + "facet": False, + "optional": True, + "store": False, + }, + # Ranking value to use when no explicit sorting is used during search + # Set to the RFC number for RFCs and the revision number for drafts + # This ensures newer RFCs get listed first in the default search results + # (without a query) + {"name": "ranking", "type": "int32", "facet": False}, + ], +} + + +def create_collection(): + collection_name = get_collection_name() + log(f"Creating '{collection_name}' collection") + client = get_typesense_client() + client.collections.create({"name": get_collection_name()} | DOCS_SCHEMA) + + +def delete_collection(): + collection_name = get_collection_name() + log(f"Deleting '{collection_name}' collection") + client = get_typesense_client() + try: + client.collections[collection_name].delete() + except typesense.exceptions.ObjectNotFound: + pass diff --git a/ietf/utils/tests_searchindex.py b/ietf/utils/tests_searchindex.py index 8740716c85..0bff96ec7d 100644 --- a/ietf/utils/tests_searchindex.py +++ b/ietf/utils/tests_searchindex.py @@ -1,6 +1,7 @@ # Copyright The IETF Trust 2026, All Rights Reserved from unittest import mock +import typesense.exceptions from django.conf import settings from django.test.utils import override_settings @@ -51,42 +52,29 @@ def test_sanitize_text(self): "TYPESENSE_COLLECTION_NAME": "frogs", } ) - @mock.patch("ietf.utils.searchindex.typesense.Client") - def test_update_or_create_rfc_entry(self, mock_ts_client_constructor): + def test_typesense_doc_from_rfc(self): not_rfc = WgDraftFactory() assert isinstance(not_rfc, Document) with self.assertRaises(AssertionError): - searchindex.update_or_create_rfc_entry(not_rfc) - self.assertFalse(mock_ts_client_constructor.called) + searchindex.typesense_doc_from_rfc(not_rfc) invalid_rfc = WgRfcFactory(name="rfc1000000", rfc_number=None) assert isinstance(invalid_rfc, Document) with self.assertRaises(AssertionError): - searchindex.update_or_create_rfc_entry(invalid_rfc) - self.assertFalse(mock_ts_client_constructor.called) + searchindex.typesense_doc_from_rfc(invalid_rfc) rfc = PublishedRfcDocEventFactory().doc assert isinstance(rfc, Document) - searchindex.update_or_create_rfc_entry(rfc) - self.assertTrue(mock_ts_client_constructor.called) - # walk the tree down to the method we expected to be called... - mock_upsert = mock_ts_client_constructor.return_value.collections[ - "frogs" - ].documents.upsert # matches value in override_settings above - self.assertTrue(mock_upsert.called) - upserted_dict = mock_upsert.call_args[0][0] + result = searchindex.typesense_doc_from_rfc(rfc) # Check a few values, not exhaustive - self.assertEqual(upserted_dict["id"], f"doc-{rfc.pk}") - self.assertEqual(upserted_dict["rfcNumber"], rfc.rfc_number) - self.assertEqual( - upserted_dict["abstract"], searchindex._sanitize_text(rfc.abstract) - ) - self.assertNotIn("adName", upserted_dict) - self.assertNotIn("content", upserted_dict) # no blob - self.assertNotIn("subseries", upserted_dict) + self.assertEqual(result["id"], f"doc-{rfc.pk}") + self.assertEqual(result["rfcNumber"], rfc.rfc_number) + self.assertEqual(result["abstract"], searchindex._sanitize_text(rfc.abstract)) + self.assertNotIn("adName", result) + self.assertNotIn("content", result) # no blob + self.assertNotIn("subseries", result) # repeat, this time with contents, an AD, and subseries docs - mock_upsert.reset_mock() store_str( kind="rfc", name=f"txt/{rfc.name}.txt", @@ -99,17 +87,15 @@ def test_update_or_create_rfc_entry(self, mock_ts_client_constructor): # (the typesense schema does not support this for real at the moment) BcpFactory(contains=[rfc], name="bcp1234") StdFactory(contains=[rfc], name="std1234") - searchindex.update_or_create_rfc_entry(rfc) - self.assertTrue(mock_upsert.called) - upserted_dict = mock_upsert.call_args[0][0] + result = searchindex.typesense_doc_from_rfc(rfc) # Check a few values, not exhaustive self.assertEqual( - upserted_dict["content"], + result["content"], searchindex._sanitize_text("The contents of this RFC"), ) - self.assertEqual(upserted_dict["adName"], "Alfred D. Rector") - self.assertIn("subseries", upserted_dict) - ss_dict = upserted_dict["subseries"] + self.assertEqual(result["adName"], "Alfred D. Rector") + self.assertIn("subseries", result) + ss_dict = result["subseries"] # We should get one of the two subseries docs, but neither is more correct # than the other... self.assertTrue( @@ -119,10 +105,108 @@ def test_update_or_create_rfc_entry(self, mock_ts_client_constructor): ) ) - # Finally, delete the contents blob and make sure things don't blow up - mock_upsert.reset_mock() + # Finally, delete the contents blob and make sure things don't blow up Blob.objects.get(bucket="rfc", name=f"txt/{rfc.name}.txt").delete() + result = searchindex.typesense_doc_from_rfc(rfc) + self.assertNotIn("content", result) + + @override_settings( + SEARCHINDEX_CONFIG={ + "TYPESENSE_API_URL": "http://ts.example.com", + "TYPESENSE_API_KEY": "test-api-key", + "TYPESENSE_COLLECTION_NAME": "frogs", + } + ) + @mock.patch("ietf.utils.searchindex.typesense_doc_from_rfc") + @mock.patch("ietf.utils.searchindex.typesense.Client") + def test_update_or_create_rfc_entry( + self, mock_ts_client_constructor, mock_tdoc_from_rfc + ): + fake_tdoc = object() + mock_tdoc_from_rfc.return_value = fake_tdoc + rfc = WgRfcFactory() + assert isinstance(rfc, Document) searchindex.update_or_create_rfc_entry(rfc) + self.assertTrue(mock_ts_client_constructor.called) + # walk the tree down to the method we expected to be called... + mock_upsert = mock_ts_client_constructor.return_value.collections[ + "frogs" # matches value in override_settings above + ].documents.upsert self.assertTrue(mock_upsert.called) - upserted_dict = mock_upsert.call_args[0][0] - self.assertNotIn("content", upserted_dict) + self.assertEqual(mock_upsert.call_args, mock.call(fake_tdoc)) + + @override_settings( + SEARCHINDEX_CONFIG={ + "TYPESENSE_API_URL": "http://ts.example.com", + "TYPESENSE_API_KEY": "test-api-key", + "TYPESENSE_COLLECTION_NAME": "frogs", + } + ) + @mock.patch("ietf.utils.searchindex.typesense_doc_from_rfc") + @mock.patch("ietf.utils.searchindex.typesense.Client") + def test_update_or_create_rfc_entries( + self, mock_ts_client_constructor, mock_tdoc_from_rfc + ): + fake_tdoc = object() + mock_tdoc_from_rfc.return_value = fake_tdoc + rfc = WgRfcFactory() + assert isinstance(rfc, Document) + searchindex.update_or_create_rfc_entries([rfc] * 50) # list of docs... + self.assertEqual(mock_ts_client_constructor.call_count, 1) + # walk the tree down to the method we expected to be called... + mock_import_ = mock_ts_client_constructor.return_value.collections[ + "frogs" # matches value in override_settings above + ].documents.import_ + self.assertEqual(mock_import_.call_count, 1) + self.assertEqual( + mock_import_.call_args, mock.call([fake_tdoc] * 50, {"action": "upsert"}) + ) + + mock_import_.reset_mock() + searchindex.update_or_create_rfc_entries([rfc] * 50, batchsize=20) + self.assertEqual(mock_ts_client_constructor.call_count, 2) # one more + # walk the tree down to the method we expected to be called... + mock_import_ = mock_ts_client_constructor.return_value.collections[ + "frogs" # matches value in override_settings above + ].documents.import_ + self.assertEqual(mock_import_.call_count, 3) + self.assertEqual( + mock_import_.call_args_list, + [ + mock.call([fake_tdoc] * 20, {"action": "upsert"}), + mock.call([fake_tdoc] * 20, {"action": "upsert"}), + mock.call([fake_tdoc] * 10, {"action": "upsert"}), + ], + ) + + @override_settings( + SEARCHINDEX_CONFIG={ + "TYPESENSE_API_URL": "http://ts.example.com", + "TYPESENSE_API_KEY": "test-api-key", + "TYPESENSE_COLLECTION_NAME": "frogs", + } + ) + @mock.patch("ietf.utils.searchindex.typesense.Client") + def test_create_collection(self, mock_ts_client_constructor): + searchindex.create_collection() + self.assertEqual(mock_ts_client_constructor.call_count, 1) + mock_collections = mock_ts_client_constructor.return_value.collections + self.assertTrue(mock_collections.create.called) + self.assertEqual(mock_collections.create.call_args[0][0]["name"], "frogs") + + @override_settings( + SEARCHINDEX_CONFIG={ + "TYPESENSE_API_URL": "http://ts.example.com", + "TYPESENSE_API_KEY": "test-api-key", + "TYPESENSE_COLLECTION_NAME": "frogs", + } + ) + @mock.patch("ietf.utils.searchindex.typesense.Client") + def test_delete_collection(self, mock_ts_client_constructor): + searchindex.delete_collection() + self.assertEqual(mock_ts_client_constructor.call_count, 1) + mock_collections = mock_ts_client_constructor.return_value.collections + self.assertTrue(mock_collections["frogs"].delete.called) + + mock_collections["frogs"].side_effect = typesense.exceptions.ObjectNotFound + searchindex.delete_collection() # should ignore the exception From c4cb8b91fc9434a3bb3419acfac2dd3b30cb4a6c Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Fri, 17 Apr 2026 07:33:51 -0300 Subject: [PATCH 23/27] fix: add pages to typesense schema (#10726) --- ietf/utils/searchindex.py | 4 ++++ ietf/utils/tests_searchindex.py | 1 + 2 files changed, 5 insertions(+) diff --git a/ietf/utils/searchindex.py b/ietf/utils/searchindex.py index a47e6d2f12..87951abb60 100644 --- a/ietf/utils/searchindex.py +++ b/ietf/utils/searchindex.py @@ -86,6 +86,7 @@ def _sanitize_text(content): def typesense_doc_from_rfc(rfc: Document) -> DocumentSchema: assert rfc.type_id == "rfc" assert rfc.rfc_number is not None + assert rfc.pages is not None keywords: list[str] = rfc.keywords # help type checking @@ -119,6 +120,7 @@ def typesense_doc_from_rfc(rfc: Document) -> DocumentSchema: "filename": rfc.name, "title": rfc.title, "abstract": _sanitize_text(rfc.abstract), + "pages": rfc.pages, "keywords": keywords, "type": "rfc", "state": [state.name for state in rfc.states.all()], @@ -231,6 +233,8 @@ def update_or_create_rfc_entries( {"name": "title", "type": "string", "facet": False}, # Abstract of the draft / rfc {"name": "abstract", "type": "string", "facet": False}, + # Number of pages + {"name": "pages", "type": "int32", "facet": False}, # A list of search keywords if relevant, set to empty array otherwise {"name": "keywords", "type": "string[]", "facet": True}, # Type of the document diff --git a/ietf/utils/tests_searchindex.py b/ietf/utils/tests_searchindex.py index 0bff96ec7d..e9fbf52020 100644 --- a/ietf/utils/tests_searchindex.py +++ b/ietf/utils/tests_searchindex.py @@ -70,6 +70,7 @@ def test_typesense_doc_from_rfc(self): self.assertEqual(result["id"], f"doc-{rfc.pk}") self.assertEqual(result["rfcNumber"], rfc.rfc_number) self.assertEqual(result["abstract"], searchindex._sanitize_text(rfc.abstract)) + self.assertEqual(result["pages"], rfc.pages) self.assertNotIn("adName", result) self.assertNotIn("content", result) # no blob self.assertNotIn("subseries", result) From 629ffb13480201e25fc5d941cfcea9de123562f9 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Fri, 17 Apr 2026 15:04:23 -0300 Subject: [PATCH 24/27] fix: decode non-utf-8 blob content (#10729) * refactor: decode_document_content() utility method * fix: fall back to latin-1 in retrieve_str() * refactor: match structure with retrieve_bytes() * refactor: separate tests_text.py module * test: test_decode_document_content + ruff * fix: revert misguided refactor * test: assert to guarantee test is valid --- ietf/doc/models.py | 15 ++------- ietf/doc/storage_utils.py | 47 +++++++++++++------------- ietf/utils/tests.py | 19 ----------- ietf/utils/tests_text.py | 71 +++++++++++++++++++++++++++++++++++++++ ietf/utils/text.py | 18 ++++++++++ 5 files changed, 114 insertions(+), 56 deletions(-) create mode 100644 ietf/utils/tests_text.py diff --git a/ietf/doc/models.py b/ietf/doc/models.py index 972f0a34e8..cc79b73831 100644 --- a/ietf/doc/models.py +++ b/ietf/doc/models.py @@ -52,6 +52,7 @@ from ietf.person.utils import get_active_balloters from ietf.utils import log from ietf.utils.decorators import memoize +from ietf.utils.text import decode_document_content from ietf.utils.validators import validate_no_control_chars from ietf.utils.mail import formataddr from ietf.utils.models import ForeignKey @@ -640,19 +641,7 @@ def text(self, size = -1): except IOError as e: log.log(f"Error reading text for {path}: {e}") return None - text = None - try: - text = raw.decode('utf-8') - except UnicodeDecodeError: - for back in range(1,4): - try: - text = raw[:-back].decode('utf-8') - break - except UnicodeDecodeError: - pass - if text is None: - text = raw.decode('latin-1') - return text + return decode_document_content(raw) def text_or_error(self): return self.text() or "Error; cannot read '%s'"%self.get_base_name() diff --git a/ietf/doc/storage_utils.py b/ietf/doc/storage_utils.py index ffdd4599be..9c18bb8a8a 100644 --- a/ietf/doc/storage_utils.py +++ b/ietf/doc/storage_utils.py @@ -10,6 +10,7 @@ from django.core.files.storage import storages, Storage from ietf.utils.log import log +from ietf.utils.text import decode_document_content class StorageUtilsError(Exception): @@ -164,32 +165,30 @@ def store_str( def retrieve_bytes(kind: str, name: str) -> bytes: from ietf.doc.storage import maybe_log_timing - content = b"" - if settings.ENABLE_BLOBSTORAGE: - try: - store = _get_storage(kind) - with store.open(name) as f: - with maybe_log_timing( - hasattr(store, "ietf_log_blob_timing") and store.ietf_log_blob_timing, - "read", - bucket_name=store.bucket_name if hasattr(store, "bucket_name") else "", - name=name, - ): - content = f.read() - except Exception as err: - log(f"Blobstore Error: Failed to read bytes from {kind}:{name}: {repr(err)}") - raise + if not settings.ENABLE_BLOBSTORAGE: + return b"" + try: + store = _get_storage(kind) + with store.open(name) as f: + with maybe_log_timing( + hasattr(store, "ietf_log_blob_timing") and store.ietf_log_blob_timing, + "read", + bucket_name=store.bucket_name if hasattr(store, "bucket_name") else "", + name=name, + ): + content = f.read() + except Exception as err: + log(f"Blobstore Error: Failed to read bytes from {kind}:{name}: {repr(err)}") + raise return content def retrieve_str(kind: str, name: str) -> str: - content = "" - if settings.ENABLE_BLOBSTORAGE: - try: - content_bytes = retrieve_bytes(kind, name) - # TODO-BLOBSTORE: try to decode all the different ways doc.text() does - content = content_bytes.decode("utf-8") - except Exception as err: - log(f"Blobstore Error: Failed to read string from {kind}:{name}: {repr(err)}") - raise + if not settings.ENABLE_BLOBSTORAGE: + return "" + try: + content = decode_document_content(retrieve_bytes(kind, name)) + except Exception as err: + log(f"Blobstore Error: Failed to read string from {kind}:{name}: {repr(err)}") + raise return content diff --git a/ietf/utils/tests.py b/ietf/utils/tests.py index 3288309095..99c33f34b3 100644 --- a/ietf/utils/tests.py +++ b/ietf/utils/tests.py @@ -60,7 +60,6 @@ set_url_coverage, ) from ietf.utils.test_utils import TestCase, unicontent -from ietf.utils.text import parse_unicode from ietf.utils.timezone import timezone_not_near_midnight from ietf.utils.xmldraft import XMLDraft, InvalidMetadataError, capture_xml2rfc_output @@ -864,24 +863,6 @@ def test_assertion(self): assertion('False') settings.SERVER_MODE = 'test' -class TestRFC2047Strings(TestCase): - def test_parse_unicode(self): - names = ( - ('=?utf-8?b?4Yuz4YuK4Ym1IOGJoOGJgOGIiA==?=', 'ዳዊት በቀለ'), - ('=?utf-8?b?5Li9IOmDnA==?=', '丽 郜'), - ('=?utf-8?b?4KSV4KSu4KWN4KSs4KWL4KScIOCkoeCkvuCksA==?=', 'कम्बोज डार'), - ('=?utf-8?b?zpfPgc6szrrOu861zrnOsSDOm865z4zOvc+Ezrc=?=', 'Ηράκλεια Λιόντη'), - ('=?utf-8?b?15nXqdeo15DXnCDXqNeV15bXoNek15zXkw==?=', 'ישראל רוזנפלד'), - ('=?utf-8?b?5Li95Y2OIOeahw==?=', '丽华 皇'), - ('=?utf-8?b?77ul77qu766V77qzIO+tlu+7ru+vvu+6ju+7pw==?=', 'ﻥﺮﮕﺳ ﭖﻮﯾﺎﻧ'), - ('=?utf-8?b?77uh77uu77qz77uu76++IO+6su+7tO+7p++6jSDvurDvu6Pvuo7vu6jvr74=?=', 'ﻡﻮﺳﻮﯾ ﺲﻴﻧﺍ ﺰﻣﺎﻨﯾ'), - ('=?utf-8?b?ScOxaWdvIFNhbsOnIEliw6HDsWV6IGRlIGxhIFBlw7Fh?=', 'Iñigo Sanç Ibáñez de la Peña'), - ('Mart van Oostendorp', 'Mart van Oostendorp'), - ('', ''), - ) - for encoded_str, unicode in names: - self.assertEqual(unicode, parse_unicode(encoded_str)) - class TestAndroidSiteManifest(TestCase): def test_manifest(self): r = self.client.get(urlreverse('site.webmanifest')) diff --git a/ietf/utils/tests_text.py b/ietf/utils/tests_text.py new file mode 100644 index 0000000000..51aa2eff13 --- /dev/null +++ b/ietf/utils/tests_text.py @@ -0,0 +1,71 @@ +# Copyright The IETF Trust 2021-2026, All Rights Reserved +from ietf.utils.test_utils import TestCase +from ietf.utils.text import parse_unicode, decode_document_content + + +class TestDecoders(TestCase): + def test_parse_unicode(self): + names = ( + ("=?utf-8?b?4Yuz4YuK4Ym1IOGJoOGJgOGIiA==?=", "ዳዊት በቀለ"), + ("=?utf-8?b?5Li9IOmDnA==?=", "丽 郜"), + ("=?utf-8?b?4KSV4KSu4KWN4KSs4KWL4KScIOCkoeCkvuCksA==?=", "कम्बोज डार"), + ("=?utf-8?b?zpfPgc6szrrOu861zrnOsSDOm865z4zOvc+Ezrc=?=", "Ηράκλεια Λιόντη"), + ("=?utf-8?b?15nXqdeo15DXnCDXqNeV15bXoNek15zXkw==?=", "ישראל רוזנפלד"), + ("=?utf-8?b?5Li95Y2OIOeahw==?=", "丽华 皇"), + ("=?utf-8?b?77ul77qu766V77qzIO+tlu+7ru+vvu+6ju+7pw==?=", "ﻥﺮﮕﺳ ﭖﻮﯾﺎﻧ"), + ( + "=?utf-8?b?77uh77uu77qz77uu76++IO+6su+7tO+7p++6jSDvurDvu6Pvuo7vu6jvr74=?=", + "ﻡﻮﺳﻮﯾ ﺲﻴﻧﺍ ﺰﻣﺎﻨﯾ", + ), + ( + "=?utf-8?b?ScOxaWdvIFNhbsOnIEliw6HDsWV6IGRlIGxhIFBlw7Fh?=", + "Iñigo Sanç Ibáñez de la Peña", + ), + ("Mart van Oostendorp", "Mart van Oostendorp"), + ("", ""), + ) + for encoded_str, unicode in names: + self.assertEqual(unicode, parse_unicode(encoded_str)) + + def test_decode_document_content(self): + utf8_bytes = "𒀭𒊩𒌆𒄈𒋢".encode("utf-8") # ends with 4-byte character + latin1_bytes = "àéîøü".encode("latin-1") + other_bytes = "àéîøü".encode("macintosh") # different from its latin-1 encoding + assert other_bytes.decode("macintosh") != other_bytes.decode("latin-1"),\ + "test broken: other_bytes must decode differently as latin-1" + + # simplest case + self.assertEqual( + decode_document_content(utf8_bytes), + utf8_bytes.decode(), + ) + # losing 1-4 bytes from the end leave the last character incomplete; the + # decoder should decode all but that last character + self.assertEqual( + decode_document_content(utf8_bytes[:-1]), + utf8_bytes.decode()[:-1], + ) + self.assertEqual( + decode_document_content(utf8_bytes[:-2]), + utf8_bytes.decode()[:-1], + ) + self.assertEqual( + decode_document_content(utf8_bytes[:-3]), + utf8_bytes.decode()[:-1], + ) + self.assertEqual( + decode_document_content(utf8_bytes[:-4]), + utf8_bytes.decode()[:-1], + ) + + # latin-1 is also simple + self.assertEqual( + decode_document_content(latin1_bytes), + latin1_bytes.decode("latin-1"), + ) + + # other character sets are just treated as latin1 (bug? feature? you decide) + self.assertEqual( + decode_document_content(other_bytes), + other_bytes.decode("latin-1"), + ) diff --git a/ietf/utils/text.py b/ietf/utils/text.py index 590ec3fd30..2763056e1a 100644 --- a/ietf/utils/text.py +++ b/ietf/utils/text.py @@ -263,3 +263,21 @@ def parse_unicode(text): else: text = decoded_string return text + + +def decode_document_content(content: bytes) -> str: + """Decode document contents as utf-8 or latin1 + + Method was developed in DocumentInfo.text() where it gave acceptable results + for existing documents / RFCs. + """ + try: + return content.decode("utf-8") + except UnicodeDecodeError: + pass + for back in range(1, 4): + try: + return content[:-back].decode("utf-8") + except UnicodeDecodeError: + pass + return content.decode("latin-1") # everything is legal in latin-1 From 63a69945ab11b1c3b3ec490fb260073c90eed0bc Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Fri, 17 Apr 2026 16:24:18 -0500 Subject: [PATCH 25/27] test: Squash some transient test error vectors (#10730) * test: enforce queryset order assumed by test * test: match html escaping in test * test: search more specifically for tokens to avoid mis-reading them when they occur in faker data --- ietf/group/tests_review.py | 30 +++++++++++++------------- ietf/meeting/tests_session_requests.py | 2 +- ietf/meeting/tests_views.py | 7 +++--- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/ietf/group/tests_review.py b/ietf/group/tests_review.py index 89c755bb26..bb9b79a416 100644 --- a/ietf/group/tests_review.py +++ b/ietf/group/tests_review.py @@ -888,10 +888,10 @@ def test_requests_history_filter_page(self): self.assertEqual(r.status_code, 200) self.assertContains(r, review_req.doc.name) self.assertContains(r, review_req2.doc.name) - self.assertContains(r, 'Assigned') - self.assertContains(r, 'Accepted') - self.assertContains(r, 'Completed') - self.assertContains(r, 'Ready') + self.assertContains(r, 'data-text="Assigned"') + self.assertContains(r, 'data-text="Accepted"') + self.assertContains(r, 'data-text="Completed"') + self.assertContains(r, 'data-text="Ready"') self.assertContains(r, escape(assignment.reviewer.person.name)) self.assertContains(r, escape(assignment2.reviewer.person.name)) @@ -907,10 +907,10 @@ def test_requests_history_filter_page(self): self.assertEqual(r.status_code, 200) self.assertContains(r, review_req.doc.name) self.assertNotContains(r, review_req2.doc.name) - self.assertContains(r, 'Assigned') - self.assertNotContains(r, 'Accepted') - self.assertNotContains(r, 'Completed') - self.assertNotContains(r, 'Ready') + self.assertContains(r, 'data-text="Assigned"') + self.assertNotContains(r, 'data-text="Accepted"') + self.assertNotContains(r, 'data-text="Completed"') + self.assertNotContains(r, 'data-text="Ready"') self.assertContains(r, escape(assignment.reviewer.person.name)) self.assertNotContains(r, escape(assignment2.reviewer.person.name)) @@ -926,10 +926,10 @@ def test_requests_history_filter_page(self): self.assertEqual(r.status_code, 200) self.assertNotContains(r, review_req.doc.name) self.assertContains(r, review_req2.doc.name) - self.assertNotContains(r, 'Assigned') - self.assertContains(r, 'Accepted') - self.assertContains(r, 'Completed') - self.assertContains(r, 'Ready') + self.assertNotContains(r, 'data-text="Assigned"') + self.assertContains(r, 'data-text="Accepted"') + self.assertContains(r, 'data-text="Completed"') + self.assertContains(r, 'data-text="Ready"') self.assertNotContains(r, escape(assignment.reviewer.person.name)) self.assertContains(r, escape(assignment2.reviewer.person.name)) @@ -940,9 +940,9 @@ def test_requests_history_filter_page(self): r = self.client.get(url) self.assertEqual(r.status_code, 200) self.assertNotContains(r, review_req.doc.name) - self.assertNotContains(r, 'Assigned') - self.assertNotContains(r, 'Accepted') - self.assertNotContains(r, 'Completed') + self.assertNotContains(r, 'data-text="Assigned"') + self.assertNotContains(r, 'data-text="Accepted"') + self.assertNotContains(r, 'data-text="Completed"') def test_requests_history_invalid_filter_parameters(self): # First assignment as assigned diff --git a/ietf/meeting/tests_session_requests.py b/ietf/meeting/tests_session_requests.py index 0cb092d2f8..42dbee5f23 100644 --- a/ietf/meeting/tests_session_requests.py +++ b/ietf/meeting/tests_session_requests.py @@ -236,7 +236,7 @@ def test_edit(self): self.assertRedirects(r, redirect_url) # Check whether updates were stored in the database - sessions = Session.objects.filter(meeting=meeting, group=mars) + sessions = Session.objects.filter(meeting=meeting, group=mars).order_by("id") self.assertEqual(len(sessions), 2) session = sessions[0] self.assertFalse(session.constraints().filter(name='time_relation')) diff --git a/ietf/meeting/tests_views.py b/ietf/meeting/tests_views.py index 258ffe554c..17988e50be 100644 --- a/ietf/meeting/tests_views.py +++ b/ietf/meeting/tests_views.py @@ -33,6 +33,7 @@ from django.http import QueryDict, FileResponse from django.template import Context, Template from django.utils import timezone +from django.utils.html import escape from django.utils.safestring import mark_safe from django.utils.text import slugify @@ -9491,7 +9492,7 @@ def test_session_attendance(self): self.assertEqual(r.status_code, 200) self.assertContains(r, '3 attendees') for person in persons: - self.assertContains(r, person.plain_name()) + self.assertContains(r, escape(person.plain_name())) # Test for the "I was there" button. def _test_button(person, expected): @@ -9511,14 +9512,14 @@ def _test_button(person, expected): # attempt to POST anyway is ignored r = self.client.post(attendance_url) self.assertEqual(r.status_code, 200) - self.assertNotContains(r, persons[3].plain_name()) + self.assertNotContains(r, escape(persons[3].plain_name())) self.assertEqual(session.attended_set.count(), 3) # button is shown, and POST is accepted meeting.importantdate_set.update(name_id='revsub',date=date_today() + datetime.timedelta(days=20)) _test_button(persons[3], True) r = self.client.post(attendance_url) self.assertEqual(r.status_code, 200) - self.assertContains(r, persons[3].plain_name()) + self.assertContains(r, escape(persons[3].plain_name())) self.assertEqual(session.attended_set.count(), 4) # When the meeting is finalized, a bluesheet file is generated, From dc49dc8362812893cad560feecc55efcea1553dc Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 20 Apr 2026 14:29:41 -0300 Subject: [PATCH 26/27] chore: beat termination grace period -> 10 s (#10741) --- k8s/beat.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/k8s/beat.yaml b/k8s/beat.yaml index 9ab242681c..b4291c7e31 100644 --- a/k8s/beat.yaml +++ b/k8s/beat.yaml @@ -59,4 +59,4 @@ spec: name: files-cfgmap dnsPolicy: ClusterFirst restartPolicy: Always - terminationGracePeriodSeconds: 600 + terminationGracePeriodSeconds: 10 From 4d69329ef86054fa5bfb9da9acd0c966ab013d8f Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 20 Apr 2026 23:59:36 -0300 Subject: [PATCH 27/27] chore: remove blobdb profiling logs (#10732) These are not useful any more, blobdb is fast --- ietf/doc/storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ietf/doc/storage.py b/ietf/doc/storage.py index 375620ccaf..ee1e76c4fa 100644 --- a/ietf/doc/storage.py +++ b/ietf/doc/storage.py @@ -114,7 +114,6 @@ def _get_write_parameters(self, name, content=None): class StoredObjectBlobdbStorage(BlobdbStorage): - ietf_log_blob_timing = True warn_if_missing = True # TODO-BLOBSTORE make this configurable (or remove it) def _save_stored_object(self, name, content) -> StoredObject: