diff --git a/ietf/sync/rfceditor.py b/ietf/sync/rfceditor.py index 889317cdcf..a3c6580452 100644 --- a/ietf/sync/rfceditor.py +++ b/ietf/sync/rfceditor.py @@ -1,4 +1,4 @@ -# Copyright The IETF Trust 2012-2020, All Rights Reserved +# Copyright The IETF Trust 2012-2025, All Rights Reserved # -*- coding: utf-8 -*- @@ -12,6 +12,7 @@ from xml.dom import pulldom, Node from django.conf import settings +from django.db import transaction from django.db.models import Subquery, OuterRef, F, Q from django.utils import timezone from django.utils.encoding import smart_bytes, force_str @@ -30,9 +31,9 @@ from ietf.utils.mail import send_mail_text from ietf.utils.timezone import datetime_from_date, RPC_TZINFO -#QUEUE_URL = "https://www.rfc-editor.org/queue2.xml" -#INDEX_URL = "https://www.rfc-editor.org/rfc/rfc-index.xml" -#POST_APPROVED_DRAFT_URL = "https://www.rfc-editor.org/sdev/jsonexp/jsonparser.php" +# QUEUE_URL = "https://www.rfc-editor.org/queue2.xml" +# INDEX_URL = "https://www.rfc-editor.org/rfc/rfc-index.xml" +# POST_APPROVED_DRAFT_URL = "https://www.rfc-editor.org/sdev/jsonexp/jsonparser.php" MIN_ERRATA_RESULTS = 5000 MIN_INDEX_RESULTS = 8000 @@ -427,7 +428,7 @@ def update_docs_from_rfc_index( pass # Logging below warning turns out to be unhelpful - there are many references # to such things in the index: - # * all april-1 RFCs have an internal name that looks like a draft name, but there + # * all april-1 RFCs have an internal name that looks like a draft name, but there # was never such a draft. More of these will exist in the future # * Several documents were created with out-of-band input to the RFC-editor, for a # variety of reasons. @@ -436,7 +437,7 @@ def update_docs_from_rfc_index( # If there is no draft to point to, don't point to one, even if there was an RPC # internal name in use (and in the RPC database). This will be a requirement on the # reimplementation of the creation of the rfc-index. - # + # # log(f"Warning: RFC index for {rfc_number} referred to unknown draft {draft_name}") # Find or create the RFC document @@ -466,7 +467,7 @@ def update_docs_from_rfc_index( if draft: doc.formal_languages.set(draft.formal_languages.all()) for author in draft.documentauthor_set.all(): - # Copy the author but point at the new doc. + # Copy the author but point at the new doc. # See https://docs.djangoproject.com/en/4.2/topics/db/queries/#copying-model-instances author.pk = None author.id = None @@ -707,12 +708,27 @@ def parse_relation_list(l): subseries_doc.docevent_set.create(type="sync_from_rfc_editor", by=system, desc=f"Added {doc.name} to {subseries_doc.name}") rfc_events.append(doc.docevent_set.create(type="sync_from_rfc_editor", by=system, desc=f"Added {doc.name} to {subseries_doc.name}")) - for subdoc in doc.related_that("contains"): - if subdoc.name not in also: - assert(not first_sync_creating_subseries) - subseries_doc.relateddocument_set.filter(target=subdoc).delete() - rfc_events.append(doc.docevent_set.create(type="sync_from_rfc_editor", by=system, desc=f"Removed {doc.name} from {subseries_doc.name}")) - subseries_doc.docevent_set.create(type="sync_from_rfc_editor", by=system, desc=f"Removed {doc.name} from {subseries_doc.name}") + # Delete subseries relations that are no longer current. Use a transaction + # so we are sure we iterate over the same relations that we delete! + with transaction.atomic(): + stale_subseries_relations = doc.relations_that("contains").exclude( + source__name__in=also + ) + for stale_relation in stale_subseries_relations: + stale_subseries_doc = stale_relation.source + rfc_events.append( + doc.docevent_set.create( + type="sync_from_rfc_editor", + by=system, + desc=f"Removed {doc.name} from {stale_subseries_doc.name}", + ) + ) + stale_subseries_doc.docevent_set.create( + type="sync_from_rfc_editor", + by=system, + desc=f"Removed {doc.name} from {stale_subseries_doc.name}", + ) + stale_subseries_relations.delete() doc_errata = errata.get(f"RFC{rfc_number}", []) all_rejected = doc_errata and all( @@ -754,9 +770,9 @@ def parse_relation_list(l): ) doc.save_with_history(rfc_events) yield rfc_number, rfc_changes, doc, rfc_published # yield changes to the RFC - + if first_sync_creating_subseries: - # First - create the known subseries documents that have ghosted. + # First - create the known subseries documents that have ghosted. # The RFC editor (as of 31 Oct 2023) claims these subseries docs do not exist. # The datatracker, on the other hand, will say that the series doc currently contains no RFCs. for name in ["fyi17", "std1", "bcp12", "bcp113", "bcp66"]: @@ -769,7 +785,6 @@ def parse_relation_list(l): subseries_slug = name[:3] subseries_doc.docevent_set.create(type=f"{subseries_slug}_history_marker", by=system, desc=f"No history of this {subseries_slug.upper()} document is currently available in the datatracker before this point") - RelatedDocument.objects.filter( Q(originaltargetaliasname__startswith="bcp") | Q(originaltargetaliasname__startswith="std") | diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index b0cdf863f0..14d65de0b2 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -20,7 +20,13 @@ import debug # pyflakes:ignore from ietf.api.views import EmailIngestionError -from ietf.doc.factories import WgDraftFactory, RfcFactory, DocumentAuthorFactory, DocEventFactory +from ietf.doc.factories import ( + WgDraftFactory, + RfcFactory, + DocumentAuthorFactory, + DocEventFactory, + BcpFactory, +) from ietf.doc.models import Document, DocEvent, DeletedEvent, DocTagName, RelatedDocument, State, StateDocEvent from ietf.doc.utils import add_state_change_event from ietf.group.factories import GroupFactory @@ -508,6 +514,120 @@ def test_rfc_index(self): changed = list(rfceditor.update_docs_from_rfc_index(data, errata, today - datetime.timedelta(days=30))) self.assertEqual(len(changed), 0) + def test_rfc_index_subseries_replacement(self): + today = date_today() + author = PersonFactory(name="Some Bozo") + + # Start with two BCPs, each containing an rfc + rfc1, rfc2, rfc3 = RfcFactory.create_batch(3, authors=[author]) + bcp1 = BcpFactory(contains=[rfc1]) + bcp2 = BcpFactory(contains=[rfc2]) + + def _nameify(doc): + """Convert a name like 'rfc1' to 'RFC0001""" + return f"{doc.name[:3].upper()}{int(doc.name[3:]):04d}" + + # RFC index that replaces rfc2 with rfc3 in bcp2 + index_xml = f""" + + + {_nameify(bcp1)} + + {_nameify(rfc1)} + + + + {_nameify(bcp2)} + + {_nameify(rfc3)} + + + + {_nameify(rfc1)} + {rfc1.title} + + Some Bozo + + + {today.strftime('%B')} + {today.strftime('%Y')} + + + ASCII + + 42 + + test + +

This is some interesting text.

+ + {_nameify(bcp1)} + + PROPOSED STANDARD + PROPOSED STANDARD + IETF +
+ + {_nameify(rfc2)} + {rfc2.title} + + Some Bozo + + + {today.strftime('%B')} + {today.strftime('%Y')} + + + ASCII + + 42 + + test + +

This is some interesting text.

+ PROPOSED STANDARD + PROPOSED STANDARD + IETF +
+ + {_nameify(rfc3)} + {rfc3.title} + + Some Bozo + + + {today.strftime('%B')} + {today.strftime('%Y')} + + + ASCII + + 42 + + test + +

This is some interesting text.

+ + {_nameify(bcp2)} + + PROPOSED STANDARD + PROPOSED STANDARD + IETF +
+
""" + data = rfceditor.parse_index(io.StringIO(index_xml)) # parse index + self.assertEqual(len(data), 3) # check that we parsed 3 RFCs + # Process the data by consuming the generator + for _ in rfceditor.update_docs_from_rfc_index(data, []): + pass + # Confirm that the expected changes were made + self.assertCountEqual(rfc1.related_that("contains"), [bcp1]) + self.assertCountEqual(rfc2.related_that("contains"), []) + self.assertCountEqual(rfc3.related_that("contains"), [bcp2]) + def _generate_rfc_queue_xml(self, draft, state, auth48_url=None): """Generate an RFC queue xml string for a draft""" t = '''