From 0fe97951230ef879102e2f7bd30bf355f93cb8b1 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Tue, 13 Jan 2026 12:55:55 -0600 Subject: [PATCH 01/25] feat: rsync rfc content, store in blob, rebuild references --- ietf/doc/tasks.py | 22 +++++++++++ ietf/doc/utils.py | 48 +++++++++++++++-------- ietf/settings.py | 1 + ietf/sync/rfceditor.py | 3 +- ietf/sync/tasks.py | 87 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 144 insertions(+), 17 deletions(-) diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index 4f7fe37782..41f191fcc9 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -3,6 +3,7 @@ # Celery task definitions # import datetime +from typing import List import debug # pyflakes:ignore from celery import shared_task @@ -29,6 +30,7 @@ from .utils import ( generate_idnits2_rfc_status, generate_idnits2_rfcs_obsoleted, + rebuild_reference_relations, update_or_create_draft_bibxml_file, ensure_draft_bibxml_path_exists, investigate_fragment, @@ -128,3 +130,23 @@ def investigate_fragment_task(name_fragment: str): "name_fragment": name_fragment, "results": investigate_fragment(name_fragment), } + +@shared_task +def rebuild_reference_relations_task(doc_names:List[str]): + log.log("Task: Rebuilding reference relations for"+str(doc_names)) + for doc in Document.objects.filter(name__in=doc_names, type__in=["rfc", "draft"]): + filenames = dict() + base = ( + settings.RFC_PATH + if doc.type_id == "rfc" + else settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR + ) + stem = doc.name if doc.type_id == "rfc" else f"{doc.name}-{doc.rev}" + for ext in ["xml", "txt"]: + path = Path(base) / f"{stem}.{ext}" + if path.is_file(): + filenames[ext] = str(path) + if len(filenames) > 0: + rebuild_reference_relations(doc, filenames) + else: + log.log(f"Found no content for {stem}") diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index 2bd9a3d314..e65ac30187 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -816,50 +816,66 @@ def rebuild_reference_relations(doc, filenames): filenames should be a dict mapping file ext (i.e., type) to the full path of each file. """ - if doc.type.slug != 'draft': + if doc.type.slug not in ["draft", "rfc"]: return None + + log.log(f"Rebuilding reference relations for {doc.name}") # try XML first - if 'xml' in filenames: - refs = XMLDraft(filenames['xml']).get_refs() - elif 'txt' in filenames: - filename = filenames['txt'] + if "xml" in filenames: + refs = XMLDraft(filenames["xml"]).get_refs() + elif "txt" in filenames: + filename = filenames["txt"] try: refs = draft.PlaintextDraft.from_file(filename).get_refs() except IOError as e: - return { 'errors': ["%s :%s" % (e.strerror, filename)] } + return {"errors": ["%s :%s" % (e.strerror, filename)]} else: - return {'errors': ['No Internet-Draft text available for rebuilding reference relations. Need XML or plaintext.']} + return { + "errors": [ + "No Internet-Draft text available for rebuilding reference relations. Need XML or plaintext." + ] + } - doc.relateddocument_set.filter(relationship__slug__in=['refnorm','refinfo','refold','refunk']).delete() + doc.relateddocument_set.filter( + relationship__slug__in=["refnorm", "refinfo", "refold", "refunk"] + ).delete() warnings = [] errors = [] unfound = set() - for ( ref, refType ) in refs.items(): + for ref, refType in refs.items(): refdoc = Document.objects.filter(name=ref) if not refdoc and re.match(r"^draft-.*-\d{2}$", ref): refdoc = Document.objects.filter(name=ref[:-3]) count = refdoc.count() if count == 0: - unfound.add( "%s" % ref ) + unfound.add("%s" % ref) continue elif count > 1: - errors.append("Too many Document objects found for %s"%ref) + errors.append("Too many Document objects found for %s" % ref) else: # Don't add references to ourself if doc != refdoc[0]: - RelatedDocument.objects.get_or_create( source=doc, target=refdoc[ 0 ], relationship=DocRelationshipName.objects.get( slug='ref%s' % refType ) ) + RelatedDocument.objects.get_or_create( + source=doc, + target=refdoc[0], + relationship=DocRelationshipName.objects.get( + slug="ref%s" % refType + ), + ) if unfound: - warnings.append('There were %d references with no matching Document'%len(unfound)) + warnings.append( + "There were %d references with no matching Document" % len(unfound) + ) ret = {} if errors: - ret['errors']=errors + ret["errors"] = errors if warnings: - ret['warnings']=warnings + ret["warnings"] = warnings if unfound: - ret['unfound']=list(unfound) + ret["unfound"] = list(unfound) return ret diff --git a/ietf/settings.py b/ietf/settings.py index 05eab0f12f..07d6f64b89 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -809,6 +809,7 @@ def skip_unreadable_post(record): "polls", "procmaterials", "review", + "rfc", "slides", "staging", "statchg", diff --git a/ietf/sync/rfceditor.py b/ietf/sync/rfceditor.py index b3234a87e2..fe89e956c3 100644 --- a/ietf/sync/rfceditor.py +++ b/ietf/sync/rfceditor.py @@ -468,7 +468,8 @@ def update_docs_from_rfc_index( doc.set_state(rfc_published_state) if draft: doc.formal_languages.set(draft.formal_languages.all()) - for author in draft.documentauthor_set.all(): + # This is known broken - it leaves RFCs with the authors of the draft that became the RFC + for author in draft.documentauthor_set.all(): # Copy the author but point at the new doc. # See https://docs.djangoproject.com/en/4.2/topics/db/queries/#copying-model-instances author.pk = None diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index e4174d3729..7d79dde722 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -4,6 +4,10 @@ # import datetime import io +from pathlib import Path +import subprocess +from tempfile import NamedTemporaryFile, TemporaryDirectory +from typing import List import requests from celery import shared_task @@ -12,6 +16,8 @@ from django.utils import timezone from ietf.doc.models import DocEvent, RelatedDocument +from ietf.doc.storage_utils import AlreadyExistsError, store_bytes +from ietf.doc.tasks import rebuild_reference_relations_task from ietf.sync import iana from ietf.sync import rfceditor from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue @@ -65,11 +71,15 @@ def rfc_editor_index_update_task(full_index=False): if len(errata_data) < rfceditor.MIN_ERRATA_RESULTS: log.log("Not enough errata entries, only %s" % len(errata_data)) return # failed + newly_published = {} for rfc_number, changes, doc, rfc_published in rfceditor.update_docs_from_rfc_index( index_data, errata_data, skip_older_than_date=skip_date ): for c in changes: log.log("RFC%s, %s: %s" % (rfc_number, doc.name, c)) + if rfc_published: + newly_published.add(rfc_number) + rsync_rfcs_from_rfceditor.delay(newly_published) @shared_task @@ -222,3 +232,80 @@ def fix_subseries_docevents_task(): DocEvent.objects.filter(type="sync_from_rfc_editor", desc=desc).update( time=obsoleting_time ) + +@shared_task +def rsync_rfcs_from_rfceditor(rfc_numbers: List[int]): + log.log("Rsyncing rfcs from rfc-editor: " + str(rfc_numbers)) + from_file = None + with NamedTemporaryFile(mode="w", delete=False) as fp: + from_file = Path(fp.name) + for num in rfc_numbers: + for ext in settings.RFC_FILE_TYPES: + fp.write(f"rfc{num}.{ext}\n") + fp.close() + subprocess.run( + [ + "/usr/bin/rsync", + "-a", + "--ignore-existing", + f"--include-from={str(from_file)}", + "--exclude=*", + "rsync.rfc-editor.org::rfcs/", + f"{settings.RFC_PATH}", + ] + ) + if from_file is not None: + from_file.unlink() + for num in rfc_numbers: + for ext in settings.RFC_FILE_TYPES: + fs_path = Path(settings.RFC_PATH) / f"rfc{num}.{ext}" + if fs_path.is_file(): + with fs_path.open("rb") as f: + bytes = f.read() + m_time = fs_path.stat().st_mtime + try: + store_bytes( + kind="rfc", + name=f"{ext}/rfc{num}.{ext}", + content=bytes, + allow_overwrite=False, # Intentionally not allowing overwrite. + doc_name=f"rfc{num}", + doc_rev=None, + # Not setting content_type + mtime=datetime.datetime.fromtimestamp(m_time, tz=datetime.UTC), + ) + except AlreadyExistsError as e: + log.log(str(e)) + # This condition will just log verbosely but not otherwise fail + + # Also fetch and store the not-prepped xml + with TemporaryDirectory() as td: + name = f"rfc{num}.notprepped.xml" + subprocess.run( + [ + "/usr/bin/rsync", + "-a", + f"rsync.rfc-editor.org::rfcs/prerelease/{name}", + f"{td}/", + ] + ) + source = Path(td)/name + with open(source,"rb") as f: + bytes = f.read() + m_time = source.stat().st_mtime + try: + store_bytes( + kind="rfc", + name=f"notprepped/{name}", + content=bytes, + allow_overwrite=False, # Intentionally not allowing overwrite. + doc_name=f"rfc{num}", + doc_rev=None, + # Not setting content_type + mtime=datetime.datetime.fromtimestamp(m_time, tz=datetime.UTC), + ) + except AlreadyExistsError as e: + log.log(str(e)) + + rebuild_reference_relations_task.delay([f"rfc{num}" for num in rfc_numbers]) + From 817ed547d59a50c8133f555d06a1665d9ec1fa5e Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Tue, 13 Jan 2026 16:22:40 -0600 Subject: [PATCH 02/25] fix: isolate subprocess. Guard against missing file --- ietf/sync/tasks.py | 41 ++++++++++++++++++++++------------------- ietf/sync/utils.py | 7 +++++++ 2 files changed, 29 insertions(+), 19 deletions(-) create mode 100644 ietf/sync/utils.py diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 7d79dde722..ffb94e3fa0 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -5,7 +5,6 @@ import datetime import io from pathlib import Path -import subprocess from tempfile import NamedTemporaryFile, TemporaryDirectory from typing import List import requests @@ -21,6 +20,7 @@ from ietf.sync import iana from ietf.sync import rfceditor from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue +from ietf.sync.utils import rsync_helper from ietf.utils import log from ietf.utils.timezone import date_today @@ -243,7 +243,7 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: List[int]): for ext in settings.RFC_FILE_TYPES: fp.write(f"rfc{num}.{ext}\n") fp.close() - subprocess.run( + rsync_helper( [ "/usr/bin/rsync", "-a", @@ -281,7 +281,7 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: List[int]): # Also fetch and store the not-prepped xml with TemporaryDirectory() as td: name = f"rfc{num}.notprepped.xml" - subprocess.run( + rsync_helper( [ "/usr/bin/rsync", "-a", @@ -290,22 +290,25 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: List[int]): ] ) source = Path(td)/name - with open(source,"rb") as f: - bytes = f.read() - m_time = source.stat().st_mtime - try: - store_bytes( - kind="rfc", - name=f"notprepped/{name}", - content=bytes, - allow_overwrite=False, # Intentionally not allowing overwrite. - doc_name=f"rfc{num}", - doc_rev=None, - # Not setting content_type - mtime=datetime.datetime.fromtimestamp(m_time, tz=datetime.UTC), - ) - except AlreadyExistsError as e: - log.log(str(e)) + if source.is_file(): + with open(source,"rb") as f: + bytes = f.read() + m_time = source.stat().st_mtime + try: + store_bytes( + kind="rfc", + name=f"notprepped/{name}", + content=bytes, + allow_overwrite=False, # Intentionally not allowing overwrite. + doc_name=f"rfc{num}", + doc_rev=None, + # Not setting content_type + mtime=datetime.datetime.fromtimestamp(m_time, tz=datetime.UTC), + ) + except AlreadyExistsError as e: + log.log(str(e)) + else: + log.log(f"No content for {name} found.") rebuild_reference_relations_task.delay([f"rfc{num}" for num in rfc_numbers]) diff --git a/ietf/sync/utils.py b/ietf/sync/utils.py new file mode 100644 index 0000000000..0e7cc93e54 --- /dev/null +++ b/ietf/sync/utils.py @@ -0,0 +1,7 @@ +# Copyright The IETF Trust 2026, All Rights Reserved + +import subprocess +from typing import List + +def rsync_helper(subprocess_arg_array:List[str]): + subprocess.run(subprocess_arg_array) From 59345b140578d6da266709a18dfa045b54cb8f5d Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Tue, 13 Jan 2026 17:21:37 -0600 Subject: [PATCH 03/25] fix: correct variable initialization. guard against unnecessary call --- ietf/sync/tasks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index ffb94e3fa0..86047ea190 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -71,7 +71,7 @@ def rfc_editor_index_update_task(full_index=False): if len(errata_data) < rfceditor.MIN_ERRATA_RESULTS: log.log("Not enough errata entries, only %s" % len(errata_data)) return # failed - newly_published = {} + newly_published = set() for rfc_number, changes, doc, rfc_published in rfceditor.update_docs_from_rfc_index( index_data, errata_data, skip_older_than_date=skip_date ): @@ -79,7 +79,8 @@ def rfc_editor_index_update_task(full_index=False): log.log("RFC%s, %s: %s" % (rfc_number, doc.name, c)) if rfc_published: newly_published.add(rfc_number) - rsync_rfcs_from_rfceditor.delay(newly_published) + if len(newly_published) > 0: + rsync_rfcs_from_rfceditor.delay(list(newly_published)) @shared_task From 2477d21847bd6c88c65926e8c7ea4a610b428383 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Tue, 13 Jan 2026 17:22:21 -0600 Subject: [PATCH 04/25] test: mock rsync task calls --- ietf/sync/tests.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index 3432f6214a..c1e129b9a8 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -889,8 +889,9 @@ class TaskTests(TestCase): @mock.patch("ietf.sync.tasks.rfceditor.update_docs_from_rfc_index") @mock.patch("ietf.sync.tasks.rfceditor.parse_index") @mock.patch("ietf.sync.tasks.requests.get") + @mock.patch("ietf.sync.tasks.rsync_rfcs_from_rfceditor.delay") def test_rfc_editor_index_update_task( - self, requests_get_mock, parse_index_mock, update_docs_mock + self, rsync_task_mock, requests_get_mock, parse_index_mock, update_docs_mock ) -> None: # the annotation here prevents mypy from complaining about annotation-unchecked """rfc_editor_index_update_task calls helpers correctly @@ -922,6 +923,7 @@ def json(self): rfc = RfcFactory() # Test with full_index = False + rsync_task_mock.return_value = None requests_get_mock.side_effect = (index_response, errata_response) # will step through these parse_index_mock.return_value = MockIndexData(length=rfceditor.MIN_INDEX_RESULTS) update_docs_mock.return_value = ( @@ -947,10 +949,13 @@ def json(self): ) self.assertIsNotNone(update_docs_kwargs["skip_older_than_date"]) + self.assertFalse(rsync_task_mock.called) + # Test again with full_index = True requests_get_mock.reset_mock() parse_index_mock.reset_mock() update_docs_mock.reset_mock() + rsync_task_mock.reset_mock() requests_get_mock.side_effect = (index_response, errata_response) # will step through these tasks.rfc_editor_index_update_task(full_index=True) @@ -971,40 +976,64 @@ def json(self): ) self.assertIsNone(update_docs_kwargs["skip_older_than_date"]) + self.assertFalse(rsync_task_mock.called) + + # Test again where the index would cause a new RFC to come into existance + requests_get_mock.reset_mock() + parse_index_mock.reset_mock() + update_docs_mock.reset_mock() + rsync_task_mock.reset_mock() + requests_get_mock.side_effect = (index_response, errata_response) # will step through these + update_docs_mock.return_value = ( + (rfc.rfc_number, ("something changed",), rfc, True), + ) + tasks.rfc_editor_index_update_task(full_index=True) + self.assertTrue(rsync_task_mock.called) + rsync_task_args, rsync_task_kwargs = rsync_task_mock.call_args + self.assertEqual((([1000],),{}),(rsync_task_args, rsync_task_kwargs)) + # Test error handling requests_get_mock.reset_mock() parse_index_mock.reset_mock() update_docs_mock.reset_mock() + rsync_task_mock.reset_mock() requests_get_mock.side_effect = requests.Timeout # timeout on every get() tasks.rfc_editor_index_update_task(full_index=False) self.assertFalse(parse_index_mock.called) self.assertFalse(update_docs_mock.called) + self.assertFalse(rsync_task_mock.called) requests_get_mock.reset_mock() parse_index_mock.reset_mock() update_docs_mock.reset_mock() + rsync_task_mock.reset_mock() requests_get_mock.side_effect = [index_response, requests.Timeout] # timeout second get() tasks.rfc_editor_index_update_task(full_index=False) self.assertFalse(update_docs_mock.called) + self.assertFalse(rsync_task_mock.called) requests_get_mock.reset_mock() parse_index_mock.reset_mock() update_docs_mock.reset_mock() + rsync_task_mock.reset_mock() requests_get_mock.side_effect = [index_response, errata_response] # feed in an index that is too short parse_index_mock.return_value = MockIndexData(length=rfceditor.MIN_INDEX_RESULTS - 1) tasks.rfc_editor_index_update_task(full_index=False) self.assertTrue(parse_index_mock.called) self.assertFalse(update_docs_mock.called) + self.assertFalse(rsync_task_mock.called) requests_get_mock.reset_mock() parse_index_mock.reset_mock() update_docs_mock.reset_mock() + rsync_task_mock.reset_mock() requests_get_mock.side_effect = [index_response, errata_response] errata_response.json_length = rfceditor.MIN_ERRATA_RESULTS - 1 # too short parse_index_mock.return_value = MockIndexData(length=rfceditor.MIN_INDEX_RESULTS) tasks.rfc_editor_index_update_task(full_index=False) self.assertFalse(update_docs_mock.called) + self.assertFalse(rsync_task_mock.called) @override_settings(RFC_EDITOR_QUEUE_URL="https://rfc-editor.example.com/queue/") @mock.patch("ietf.sync.tasks.update_drafts_from_queue") From b944af5d97dbf2fa3605b6eb826cf2bf6df37412 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 09:50:08 -0600 Subject: [PATCH 05/25] fix: use list for typing rather than List --- ietf/doc/tasks.py | 3 +-- ietf/sync/tasks.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index 41f191fcc9..a87f753753 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -3,7 +3,6 @@ # Celery task definitions # import datetime -from typing import List import debug # pyflakes:ignore from celery import shared_task @@ -132,7 +131,7 @@ def investigate_fragment_task(name_fragment: str): } @shared_task -def rebuild_reference_relations_task(doc_names:List[str]): +def rebuild_reference_relations_task(doc_names:list[str]): log.log("Task: Rebuilding reference relations for"+str(doc_names)) for doc in Document.objects.filter(name__in=doc_names, type__in=["rfc", "draft"]): filenames = dict() diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 86047ea190..e8127a905c 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -6,7 +6,6 @@ import io from pathlib import Path from tempfile import NamedTemporaryFile, TemporaryDirectory -from typing import List import requests from celery import shared_task @@ -235,7 +234,7 @@ def fix_subseries_docevents_task(): ) @shared_task -def rsync_rfcs_from_rfceditor(rfc_numbers: List[int]): +def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): log.log("Rsyncing rfcs from rfc-editor: " + str(rfc_numbers)) from_file = None with NamedTemporaryFile(mode="w", delete=False) as fp: From e76b6fbef6943c300dcc0680dc3e206a753b2c7c Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 09:57:21 -0600 Subject: [PATCH 06/25] fix: string formatting --- ietf/doc/tasks.py | 2 +- ietf/doc/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index a87f753753..651fff435b 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -132,7 +132,7 @@ def investigate_fragment_task(name_fragment: str): @shared_task def rebuild_reference_relations_task(doc_names:list[str]): - log.log("Task: Rebuilding reference relations for"+str(doc_names)) + log.log("Task: Rebuilding reference relations for {doc_names}") for doc in Document.objects.filter(name__in=doc_names, type__in=["rfc", "draft"]): filenames = dict() base = ( diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index e65ac30187..b35ba987e0 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -829,7 +829,7 @@ def rebuild_reference_relations(doc, filenames): try: refs = draft.PlaintextDraft.from_file(filename).get_refs() except IOError as e: - return {"errors": ["%s :%s" % (e.strerror, filename)]} + return {"errors": [f"{e.strerror}: {filename}")]} else: return { "errors": [ From 350cbff819914707f9bb5add581f1fca764d79f0 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 09:59:03 -0600 Subject: [PATCH 07/25] fix: generalize error string when there are no files to parse --- ietf/doc/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index b35ba987e0..6a1a44167a 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -833,7 +833,7 @@ def rebuild_reference_relations(doc, filenames): else: return { "errors": [ - "No Internet-Draft text available for rebuilding reference relations. Need XML or plaintext." + "No file available for rebuilding reference relations. Need XML or plaintext." ] } From 50f7b46b2fce88734af7e5e6f9ae2dcc067fc6bc Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 10:02:14 -0600 Subject: [PATCH 08/25] fix: use delete_on_close with NamedTemporaryFile --- ietf/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index e8127a905c..0363960871 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -237,7 +237,7 @@ def fix_subseries_docevents_task(): def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): log.log("Rsyncing rfcs from rfc-editor: " + str(rfc_numbers)) from_file = None - with NamedTemporaryFile(mode="w", delete=False) as fp: + with NamedTemporaryFile(mode="w", delete_on_close=False) as fp: from_file = Path(fp.name) for num in rfc_numbers: for ext in settings.RFC_FILE_TYPES: From 98922b875eeaaf03ea4ebffc6e6d6b0cd859ef27 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 10:04:25 -0600 Subject: [PATCH 09/25] fix: mtime is less distracting than m_time --- ietf/sync/tasks.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 0363960871..fefd7df528 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -262,7 +262,7 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): if fs_path.is_file(): with fs_path.open("rb") as f: bytes = f.read() - m_time = fs_path.stat().st_mtime + mtime = fs_path.stat().st_mtime try: store_bytes( kind="rfc", @@ -272,7 +272,7 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): doc_name=f"rfc{num}", doc_rev=None, # Not setting content_type - mtime=datetime.datetime.fromtimestamp(m_time, tz=datetime.UTC), + mtime=datetime.datetime.fromtimestamp(mtime, tz=datetime.UTC), ) except AlreadyExistsError as e: log.log(str(e)) @@ -293,7 +293,7 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): if source.is_file(): with open(source,"rb") as f: bytes = f.read() - m_time = source.stat().st_mtime + mtime = source.stat().st_mtime try: store_bytes( kind="rfc", @@ -303,7 +303,7 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): doc_name=f"rfc{num}", doc_rev=None, # Not setting content_type - mtime=datetime.datetime.fromtimestamp(m_time, tz=datetime.UTC), + mtime=datetime.datetime.fromtimestamp(mtime, tz=datetime.UTC), ) except AlreadyExistsError as e: log.log(str(e)) From d398c9f7571807c48f6c3c634edcfcf00d4b1fa5 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 10:17:00 -0600 Subject: [PATCH 10/25] fix: store the notprepped file on the fs --- ietf/sync/tasks.py | 60 +++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index fefd7df528..8d1b8b730a 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -279,36 +279,36 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): # This condition will just log verbosely but not otherwise fail # Also fetch and store the not-prepped xml - with TemporaryDirectory() as td: - name = f"rfc{num}.notprepped.xml" - rsync_helper( - [ - "/usr/bin/rsync", - "-a", - f"rsync.rfc-editor.org::rfcs/prerelease/{name}", - f"{td}/", - ] - ) - source = Path(td)/name - if source.is_file(): - with open(source,"rb") as f: - bytes = f.read() - mtime = source.stat().st_mtime - try: - store_bytes( - kind="rfc", - name=f"notprepped/{name}", - content=bytes, - allow_overwrite=False, # Intentionally not allowing overwrite. - doc_name=f"rfc{num}", - doc_rev=None, - # Not setting content_type - mtime=datetime.datetime.fromtimestamp(mtime, tz=datetime.UTC), - ) - except AlreadyExistsError as e: - log.log(str(e)) - else: - log.log(f"No content for {name} found.") + name = f"rfc{num}.notprepped.xml" + fs_dest = Path(settings.RFC_PATH) / "prerelease" + rsync_helper( + [ + "/usr/bin/rsync", + "-a", + f"rsync.rfc-editor.org::rfcs/prerelease/{name}", + f"{fs_dest}/", + ] + ) + source = fs_dest / name + if source.is_file(): + with open(source, "rb") as f: + bytes = f.read() + mtime = source.stat().st_mtime + try: + store_bytes( + kind="rfc", + name=f"notprepped/{name}", + content=bytes, + allow_overwrite=False, # Intentionally not allowing overwrite. + doc_name=f"rfc{num}", + doc_rev=None, + # Not setting content_type + mtime=datetime.datetime.fromtimestamp(mtime, tz=datetime.UTC), + ) + except AlreadyExistsError as e: + log.log(str(e)) + else: + log.log(f"No content for {name} found.") rebuild_reference_relations_task.delay([f"rfc{num}" for num in rfc_numbers]) From cf8e25b7835b34bc58fc4053db6486f23704d35d Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 10:20:20 -0600 Subject: [PATCH 11/25] fix: typo --- ietf/doc/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index 6a1a44167a..d19d3e5ad3 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -829,7 +829,7 @@ def rebuild_reference_relations(doc, filenames): try: refs = draft.PlaintextDraft.from_file(filename).get_refs() except IOError as e: - return {"errors": [f"{e.strerror}: {filename}")]} + return {"errors": [f"{e.strerror}: {filename}"]} else: return { "errors": [ From 451d50f137ef6b160ade32724f6057782a16ab0a Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 10:26:11 -0600 Subject: [PATCH 12/25] fix: fetch json, remove unneeded unlink --- ietf/sync/tasks.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 8d1b8b730a..3eac0358bd 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -236,11 +236,12 @@ def fix_subseries_docevents_task(): @shared_task def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): log.log("Rsyncing rfcs from rfc-editor: " + str(rfc_numbers)) + types_to_sync = settings.RFC_FILE_TYPES + ("json",) from_file = None with NamedTemporaryFile(mode="w", delete_on_close=False) as fp: from_file = Path(fp.name) for num in rfc_numbers: - for ext in settings.RFC_FILE_TYPES: + for ext in types_to_sync: fp.write(f"rfc{num}.{ext}\n") fp.close() rsync_helper( @@ -254,10 +255,8 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): f"{settings.RFC_PATH}", ] ) - if from_file is not None: - from_file.unlink() for num in rfc_numbers: - for ext in settings.RFC_FILE_TYPES: + for ext in types_to_sync: fs_path = Path(settings.RFC_PATH) / f"rfc{num}.{ext}" if fs_path.is_file(): with fs_path.open("rb") as f: From 98a5995742fc7b9670725a3e0a8e58d85bdac34d Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 10:37:15 -0600 Subject: [PATCH 13/25] chore: ruff --- ietf/doc/tasks.py | 2 +- ietf/sync/tasks.py | 3 +-- ietf/sync/tests.py | 7 +++++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index 651fff435b..c8034f73be 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -131,7 +131,7 @@ def investigate_fragment_task(name_fragment: str): } @shared_task -def rebuild_reference_relations_task(doc_names:list[str]): +def rebuild_reference_relations_task(doc_names: list[str]): log.log("Task: Rebuilding reference relations for {doc_names}") for doc in Document.objects.filter(name__in=doc_names, type__in=["rfc", "draft"]): filenames = dict() diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 3eac0358bd..f0cefb851d 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -276,7 +276,7 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): except AlreadyExistsError as e: log.log(str(e)) # This condition will just log verbosely but not otherwise fail - + # Also fetch and store the not-prepped xml name = f"rfc{num}.notprepped.xml" fs_dest = Path(settings.RFC_PATH) / "prerelease" @@ -310,4 +310,3 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): log.log(f"No content for {name} found.") rebuild_reference_relations_task.delay([f"rfc{num}" for num in rfc_numbers]) - diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index c1e129b9a8..7d362f5c8f 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -983,14 +983,17 @@ def json(self): parse_index_mock.reset_mock() update_docs_mock.reset_mock() rsync_task_mock.reset_mock() - requests_get_mock.side_effect = (index_response, errata_response) # will step through these + requests_get_mock.side_effect = ( + index_response, + errata_response, + ) # will step through these update_docs_mock.return_value = ( (rfc.rfc_number, ("something changed",), rfc, True), ) tasks.rfc_editor_index_update_task(full_index=True) self.assertTrue(rsync_task_mock.called) rsync_task_args, rsync_task_kwargs = rsync_task_mock.call_args - self.assertEqual((([1000],),{}),(rsync_task_args, rsync_task_kwargs)) + self.assertEqual((([1000],), {}), (rsync_task_args, rsync_task_kwargs)) # Test error handling requests_get_mock.reset_mock() From eb9e458fd6e7d08add33c5af189a8942224dfddf Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 10:38:07 -0600 Subject: [PATCH 14/25] fix: use list for typing --- ietf/sync/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ietf/sync/utils.py b/ietf/sync/utils.py index 0e7cc93e54..52feac41af 100644 --- a/ietf/sync/utils.py +++ b/ietf/sync/utils.py @@ -1,7 +1,7 @@ # Copyright The IETF Trust 2026, All Rights Reserved import subprocess -from typing import List -def rsync_helper(subprocess_arg_array:List[str]): + +def rsync_helper(subprocess_arg_array: list[str]): subprocess.run(subprocess_arg_array) From bb123b54f197a7495f849ae725f02290c4e15b92 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 12:13:33 -0600 Subject: [PATCH 15/25] fix: typo --- ietf/doc/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index c8034f73be..02b7c2a07d 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -132,7 +132,7 @@ def investigate_fragment_task(name_fragment: str): @shared_task def rebuild_reference_relations_task(doc_names: list[str]): - log.log("Task: Rebuilding reference relations for {doc_names}") + log.log(f"Task: Rebuilding reference relations for {doc_names}") for doc in Document.objects.filter(name__in=doc_names, type__in=["rfc", "draft"]): filenames = dict() base = ( From 814b147922a0d5049f5c15b3d4b2d729bbc60a4e Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 12:51:11 -0600 Subject: [PATCH 16/25] feat: bulk load rfcs into blob storage --- ietf/sync/tasks.py | 80 ++++++++++++++-------------------------------- ietf/sync/utils.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 56 deletions(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index f0cefb851d..14f82fffa1 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -5,7 +5,7 @@ import datetime import io from pathlib import Path -from tempfile import NamedTemporaryFile, TemporaryDirectory +from tempfile import NamedTemporaryFile import requests from celery import shared_task @@ -14,12 +14,11 @@ from django.utils import timezone from ietf.doc.models import DocEvent, RelatedDocument -from ietf.doc.storage_utils import AlreadyExistsError, store_bytes from ietf.doc.tasks import rebuild_reference_relations_task from ietf.sync import iana from ietf.sync import rfceditor from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue -from ietf.sync.utils import rsync_helper +from ietf.sync.utils import load_rfcs_into_blobdb, rsync_helper from ietf.utils import log from ietf.utils.timezone import date_today @@ -240,9 +239,11 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): from_file = None with NamedTemporaryFile(mode="w", delete_on_close=False) as fp: from_file = Path(fp.name) + fp.write("prerelease/\n") for num in rfc_numbers: for ext in types_to_sync: fp.write(f"rfc{num}.{ext}\n") + fp.write(f"prerelease/rfc{num}.notprepped.xml\n") fp.close() rsync_helper( [ @@ -255,58 +256,25 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): f"{settings.RFC_PATH}", ] ) - for num in rfc_numbers: - for ext in types_to_sync: - fs_path = Path(settings.RFC_PATH) / f"rfc{num}.{ext}" - if fs_path.is_file(): - with fs_path.open("rb") as f: - bytes = f.read() - mtime = fs_path.stat().st_mtime - try: - store_bytes( - kind="rfc", - name=f"{ext}/rfc{num}.{ext}", - content=bytes, - allow_overwrite=False, # Intentionally not allowing overwrite. - doc_name=f"rfc{num}", - doc_rev=None, - # Not setting content_type - mtime=datetime.datetime.fromtimestamp(mtime, tz=datetime.UTC), - ) - except AlreadyExistsError as e: - log.log(str(e)) - # This condition will just log verbosely but not otherwise fail - - # Also fetch and store the not-prepped xml - name = f"rfc{num}.notprepped.xml" - fs_dest = Path(settings.RFC_PATH) / "prerelease" - rsync_helper( - [ - "/usr/bin/rsync", - "-a", - f"rsync.rfc-editor.org::rfcs/prerelease/{name}", - f"{fs_dest}/", - ] - ) - source = fs_dest / name - if source.is_file(): - with open(source, "rb") as f: - bytes = f.read() - mtime = source.stat().st_mtime - try: - store_bytes( - kind="rfc", - name=f"notprepped/{name}", - content=bytes, - allow_overwrite=False, # Intentionally not allowing overwrite. - doc_name=f"rfc{num}", - doc_rev=None, - # Not setting content_type - mtime=datetime.datetime.fromtimestamp(mtime, tz=datetime.UTC), - ) - except AlreadyExistsError as e: - log.log(str(e)) - else: - log.log(f"No content for {name} found.") + load_rfcs_into_blobdb(rfc_numbers) rebuild_reference_relations_task.delay([f"rfc{num}" for num in rfc_numbers]) + + +@shared_task +def load_rfcs_into_blobdb_task(start: int, end: int): + """Move file content for rfcs from rfc{start} to rfc{end} inclusive + + As this is expected to be removed once the blobdb is populated, it + will truncate its work to a coded max end. + This will not overwrite any existing blob content, and will only + log a small complaint if asked to load a non-exsiting RFC. + """ + # Protect us from ourselves + if end < start: + return + if start < 1: + start = 1 + if end > 11000: # Arbitrarily chosen + end = 11000 + load_rfcs_into_blobdb(range(start, end + 1)) diff --git a/ietf/sync/utils.py b/ietf/sync/utils.py index 52feac41af..70bfcee5fb 100644 --- a/ietf/sync/utils.py +++ b/ietf/sync/utils.py @@ -1,7 +1,60 @@ # Copyright The IETF Trust 2026, All Rights Reserved +import datetime import subprocess +from pathlib import Path + +from django.conf import settings + +from ietf.utils import log +from ietf.doc.storage_utils import AlreadyExistsError, store_bytes + def rsync_helper(subprocess_arg_array: list[str]): subprocess.run(subprocess_arg_array) + + +def load_rfcs_into_blobdb(numbers: list[int]): + types_to_load = settings.RFC_FILE_TYPES + ("json",) + for num in numbers: + for ext in types_to_load: + fs_path = Path(settings.RFC_PATH) / f"rfc{num}.{ext}" + if fs_path.is_file(): + with fs_path.open("rb") as f: + bytes = f.read() + mtime = fs_path.stat().st_mtime + try: + store_bytes( + kind="rfc", + name=f"{ext}/rfc{num}.{ext}", + content=bytes, + allow_overwrite=False, # Intentionally not allowing overwrite. + doc_name=f"rfc{num}", + doc_rev=None, + # Not setting content_type + mtime=datetime.datetime.fromtimestamp(mtime, tz=datetime.UTC), + ) + except AlreadyExistsError as e: + log.log(str(e)) + + # store the not-prepped xml + name = f"rfc{num}.notprepped.xml" + source = Path(settings.RFC_PATH) / "prerelease" / name + if source.is_file(): + with open(source, "rb") as f: + bytes = f.read() + mtime = source.stat().st_mtime + try: + store_bytes( + kind="rfc", + name=f"notprepped/{name}", + content=bytes, + allow_overwrite=False, # Intentionally not allowing overwrite. + doc_name=f"rfc{num}", + doc_rev=None, + # Not setting content_type + mtime=datetime.datetime.fromtimestamp(mtime, tz=datetime.UTC), + ) + except AlreadyExistsError as e: + log.log(str(e)) From cb023adefffa0b9f43c77945d68faade460105dd Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 13:40:39 -0600 Subject: [PATCH 17/25] fix: restrict the rsync_helper to rsync --- ietf/sync/tasks.py | 1 - ietf/sync/utils.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 14f82fffa1..684c672b24 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -247,7 +247,6 @@ def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): fp.close() rsync_helper( [ - "/usr/bin/rsync", "-a", "--ignore-existing", f"--include-from={str(from_file)}", diff --git a/ietf/sync/utils.py b/ietf/sync/utils.py index 70bfcee5fb..437d72d7d9 100644 --- a/ietf/sync/utils.py +++ b/ietf/sync/utils.py @@ -12,7 +12,7 @@ def rsync_helper(subprocess_arg_array: list[str]): - subprocess.run(subprocess_arg_array) + subprocess.run(["/usr/bin/rsync"]+subprocess_arg_array) def load_rfcs_into_blobdb(numbers: list[int]): From 4a45d4fd04130fb6cb72300e0c01f5cad405e829 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 14:35:04 -0600 Subject: [PATCH 18/25] test: test ietf.sync.utils --- ietf/sync/tests_utils.py | 60 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 ietf/sync/tests_utils.py diff --git a/ietf/sync/tests_utils.py b/ietf/sync/tests_utils.py new file mode 100644 index 0000000000..8b0dd9eea7 --- /dev/null +++ b/ietf/sync/tests_utils.py @@ -0,0 +1,60 @@ +# Copyright The IETF Trust 2026, All Rights Reserved + +from pathlib import Path +from tempfile import TemporaryDirectory + +from django.test import override_settings +from ietf import settings +from ietf.doc.storage_utils import exists_in_storage, retrieve_str +from ietf.sync.utils import load_rfcs_into_blobdb, rsync_helper +from ietf.utils.test_utils import TestCase + + +class RsyncHelperTests(TestCase): + def test_rsync_helper(self): + with ( + TemporaryDirectory() as source_dir, + TemporaryDirectory() as dest_dir, + ): + with (Path(source_dir) / "canary.txt").open("w") as canary_source_file: + canary_source_file.write("chirp") + rsync_helper( + [ + "-a", + f"{source_dir}/", + f"{dest_dir}/", + ] + ) + with (Path(dest_dir) / "canary.txt").open("r") as canary_dest_file: + chirp = canary_dest_file.read() + self.assertEqual(chirp, "chirp") + + +class RfcBlobUploadTests(TestCase): + def test_load_rfcs_into_blobdb(self): + with TemporaryDirectory() as faux_rfc_path: + with override_settings(RFC_PATH=faux_rfc_path): + rfc_path = Path(faux_rfc_path) + (rfc_path / "prerelease").mkdir() + for num in [12345, 54321]: + for ext in settings.RFC_FILE_TYPES + ("json",): + with (rfc_path / f"rfc{num}.{ext}").open("w") as f: + f.write(ext) + with (rfc_path / "rfc{num}.bogon").open("w") as f: + f.write("bogon") + with (rfc_path / "prerelease" / f"rfc{num}.notprepped.xml").open( + "w" + ) as f: + f.write("notprepped") + load_rfcs_into_blobdb([12345, 54321]) + for num in [12345, 54321]: + for ext in settings.RFC_FILE_TYPES + ("json",): + self.assertEqual( + retrieve_str("rfc", f"{ext}/rfc{num}.{ext}"), + ext, + ) + self.assertFalse(exists_in_storage("rfc", f"bogon/rfc{num}.bogon")) + self.assertEqual( + retrieve_str("rfc", f"notprepped/rfc{num}.notprepped.xml"), + "notprepped", + ) From df0afbf9a0826aa8a165e32f09ad6840f0a7c2f7 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 16:16:02 -0600 Subject: [PATCH 19/25] chore: honor typing choices --- ietf/sync/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 684c672b24..597ef2c186 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -276,4 +276,4 @@ def load_rfcs_into_blobdb_task(start: int, end: int): start = 1 if end > 11000: # Arbitrarily chosen end = 11000 - load_rfcs_into_blobdb(range(start, end + 1)) + load_rfcs_into_blobdb(list(range(start, end + 1))) From 1f7ba5e04c5fcf1fcd1bd8859adfcebe0ccfd8d7 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 16:16:24 -0600 Subject: [PATCH 20/25] test: sync task tests --- ietf/sync/tests.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index 7d362f5c8f..267c484ce5 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -1166,3 +1166,51 @@ def test_iana_protocols_update_task( self.assertTrue(requests_get_mock.called) self.assertFalse(parse_protocols_mock.called) self.assertFalse(update_rfc_log_mock.called) + + @mock.patch("ietf.sync.tasks.rsync_helper") + @mock.patch("ietf.sync.tasks.load_rfcs_into_blobdb") + @mock.patch("ietf.sync.tasks.rebuild_reference_relations_task.delay") + def test_rsync_rfcs_from_rfceditor( + self, + rebuild_relations_mock, + load_blobs_mock, + rsync_helper_mock, + ): + tasks.rsync_rfcs_from_rfceditor([12345,54321]) + self.assertTrue(rsync_helper_mock.called) + self.assertTrue(load_blobs_mock.called) + load_blobs_args, load_blobs_kwargs = load_blobs_mock.call_args + self.assertEqual(load_blobs_args,([12345, 54321],)) + self.assertEqual(load_blobs_kwargs,{}) + self.assertTrue(rebuild_relations_mock.called) + rebuild_args, rebuild_kwargs = rebuild_relations_mock.call_args + self.assertEqual(rebuild_args, (["rfc12345", "rfc54321"],)) + self.assertEqual(rebuild_kwargs, {}) + + @mock.patch("ietf.sync.tasks.load_rfcs_into_blobdb") + def test_load_rfcs_into_blobdb_task( + self, + load_blobs_mock, + ): + tasks.load_rfcs_into_blobdb_task(5,3) + self.assertFalse(load_blobs_mock.called) + load_blobs_mock.reset_mock() + tasks.load_rfcs_into_blobdb_task(-1,1) + self.assertTrue(load_blobs_mock.called) + mock_args, mock_kwargs = load_blobs_mock.call_args + self.assertEqual(mock_args, ([1],)) + self.assertEqual(mock_kwargs, {}) + load_blobs_mock.reset_mock() + tasks.load_rfcs_into_blobdb_task(10999,50000) + self.assertTrue(load_blobs_mock.called) + mock_args, mock_kwargs = load_blobs_mock.call_args + self.assertEqual(mock_args, ([10999,11000],)) + self.assertEqual(mock_kwargs, {}) + load_blobs_mock.reset_mock() + tasks.load_rfcs_into_blobdb_task(3261,3263) + self.assertTrue(load_blobs_mock.called) + mock_args, mock_kwargs = load_blobs_mock.call_args + self.assertEqual(mock_args, ([3261, 3262, 3263],)) + self.assertEqual(mock_kwargs, {}) + + From 897fb33ccde9eaf085a3bd9a5cdefae55d1adccd Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 16:34:40 -0600 Subject: [PATCH 21/25] refactor: isolate the rsync from-file construction and test it --- ietf/sync/tasks.py | 15 +++++---------- ietf/sync/tests_utils.py | 24 +++++++++++++++++++++++- ietf/sync/utils.py | 9 +++++++++ 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 597ef2c186..d1492875e0 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -18,7 +18,7 @@ from ietf.sync import iana from ietf.sync import rfceditor from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue -from ietf.sync.utils import load_rfcs_into_blobdb, rsync_helper +from ietf.sync.utils import build_from_file_content, load_rfcs_into_blobdb, rsync_helper from ietf.utils import log from ietf.utils.timezone import date_today @@ -234,22 +234,17 @@ def fix_subseries_docevents_task(): @shared_task def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): - log.log("Rsyncing rfcs from rfc-editor: " + str(rfc_numbers)) - types_to_sync = settings.RFC_FILE_TYPES + ("json",) + log.log(f"Rsyncing rfcs from rfc-editor: {rfc_numbers}") from_file = None with NamedTemporaryFile(mode="w", delete_on_close=False) as fp: - from_file = Path(fp.name) - fp.write("prerelease/\n") - for num in rfc_numbers: - for ext in types_to_sync: - fp.write(f"rfc{num}.{ext}\n") - fp.write(f"prerelease/rfc{num}.notprepped.xml\n") + fp.write(build_from_file_content(rfc_numbers)) fp.close() + from_file = Path(fp.name) rsync_helper( [ "-a", "--ignore-existing", - f"--include-from={str(from_file)}", + f"--include-from={from_file}", "--exclude=*", "rsync.rfc-editor.org::rfcs/", f"{settings.RFC_PATH}", diff --git a/ietf/sync/tests_utils.py b/ietf/sync/tests_utils.py index 8b0dd9eea7..eb4b4ddf74 100644 --- a/ietf/sync/tests_utils.py +++ b/ietf/sync/tests_utils.py @@ -6,7 +6,7 @@ from django.test import override_settings from ietf import settings from ietf.doc.storage_utils import exists_in_storage, retrieve_str -from ietf.sync.utils import load_rfcs_into_blobdb, rsync_helper +from ietf.sync.utils import build_from_file_content, load_rfcs_into_blobdb, rsync_helper from ietf.utils.test_utils import TestCase @@ -29,6 +29,28 @@ def test_rsync_helper(self): chirp = canary_dest_file.read() self.assertEqual(chirp, "chirp") + def test_build_from_file_content(self): + content = build_from_file_content([12345, 54321]) + self.assertEqual( + content, + """prerelease/ +rfc12345.txt +rfc12345.html +rfc12345.xml +rfc12345.pdf +rfc12345.ps +rfc12345.json +prerelease/rfc12345.notprepped.xml +rfc54321.txt +rfc54321.html +rfc54321.xml +rfc54321.pdf +rfc54321.ps +rfc54321.json +prerelease/rfc54321.notprepped.xml +""", + ) + class RfcBlobUploadTests(TestCase): def test_load_rfcs_into_blobdb(self): diff --git a/ietf/sync/utils.py b/ietf/sync/utils.py index 437d72d7d9..5b5f8ff559 100644 --- a/ietf/sync/utils.py +++ b/ietf/sync/utils.py @@ -14,6 +14,15 @@ def rsync_helper(subprocess_arg_array: list[str]): subprocess.run(["/usr/bin/rsync"]+subprocess_arg_array) +def build_from_file_content(rfc_numbers: list[int]) -> str: + types_to_sync = settings.RFC_FILE_TYPES + ("json",) + lines = [] + lines.append("prerelease/") + for num in rfc_numbers: + for ext in types_to_sync: + lines.append(f"rfc{num}.{ext}") + lines.append(f"prerelease/rfc{num}.notprepped.xml") + return "\n".join(lines)+"\n" def load_rfcs_into_blobdb(numbers: list[int]): types_to_load = settings.RFC_FILE_TYPES + ("json",) From f1f89ca4da7af491aa956a146924d8cf7bf8fba2 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 16:36:10 -0600 Subject: [PATCH 22/25] chore: ruff --- ietf/sync/tests.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index 267c484ce5..6d93dbaa75 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -1176,12 +1176,12 @@ def test_rsync_rfcs_from_rfceditor( load_blobs_mock, rsync_helper_mock, ): - tasks.rsync_rfcs_from_rfceditor([12345,54321]) + tasks.rsync_rfcs_from_rfceditor([12345, 54321]) self.assertTrue(rsync_helper_mock.called) self.assertTrue(load_blobs_mock.called) load_blobs_args, load_blobs_kwargs = load_blobs_mock.call_args - self.assertEqual(load_blobs_args,([12345, 54321],)) - self.assertEqual(load_blobs_kwargs,{}) + self.assertEqual(load_blobs_args, ([12345, 54321],)) + self.assertEqual(load_blobs_kwargs, {}) self.assertTrue(rebuild_relations_mock.called) rebuild_args, rebuild_kwargs = rebuild_relations_mock.call_args self.assertEqual(rebuild_args, (["rfc12345", "rfc54321"],)) @@ -1192,22 +1192,22 @@ def test_load_rfcs_into_blobdb_task( self, load_blobs_mock, ): - tasks.load_rfcs_into_blobdb_task(5,3) + tasks.load_rfcs_into_blobdb_task(5, 3) self.assertFalse(load_blobs_mock.called) load_blobs_mock.reset_mock() - tasks.load_rfcs_into_blobdb_task(-1,1) + tasks.load_rfcs_into_blobdb_task(-1, 1) self.assertTrue(load_blobs_mock.called) mock_args, mock_kwargs = load_blobs_mock.call_args self.assertEqual(mock_args, ([1],)) self.assertEqual(mock_kwargs, {}) load_blobs_mock.reset_mock() - tasks.load_rfcs_into_blobdb_task(10999,50000) + tasks.load_rfcs_into_blobdb_task(10999, 50000) self.assertTrue(load_blobs_mock.called) mock_args, mock_kwargs = load_blobs_mock.call_args - self.assertEqual(mock_args, ([10999,11000],)) + self.assertEqual(mock_args, ([10999, 11000],)) self.assertEqual(mock_kwargs, {}) load_blobs_mock.reset_mock() - tasks.load_rfcs_into_blobdb_task(3261,3263) + tasks.load_rfcs_into_blobdb_task(3261, 3263) self.assertTrue(load_blobs_mock.called) mock_args, mock_kwargs = load_blobs_mock.call_args self.assertEqual(mock_args, ([3261, 3262, 3263],)) From 22c71696b836afd48791f7724a6a395eb6600e58 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 17:19:55 -0600 Subject: [PATCH 23/25] fix: reflect current changes in older test --- ietf/doc/tests_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ietf/doc/tests_utils.py b/ietf/doc/tests_utils.py index ef71f6ae6e..a2784bc85e 100644 --- a/ietf/doc/tests_utils.py +++ b/ietf/doc/tests_utils.py @@ -389,13 +389,13 @@ def test_requires_txt_or_xml(self): result = rebuild_reference_relations(self.doc, {}) self.assertCountEqual(result.keys(), ['errors']) self.assertEqual(len(result['errors']), 1) - self.assertIn('No Internet-Draft text available', result['errors'][0], + self.assertIn('No file available', result['errors'][0], 'Error should be reported if no Internet-Draft file is given') result = rebuild_reference_relations(self.doc, {'md': 'cant-do-this.md'}) self.assertCountEqual(result.keys(), ['errors']) self.assertEqual(len(result['errors']), 1) - self.assertIn('No Internet-Draft text available', result['errors'][0], + self.assertIn('No file available', result['errors'][0], 'Error should be reported if no XML or plaintext file is given') @patch.object(XMLDraft, 'get_refs') From 09324c9eea46e47ea13a0f00440ed3bea523cbb6 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 14 Jan 2026 17:22:22 -0600 Subject: [PATCH 24/25] fix: address incorrect test assumption --- ietf/sync/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index 87cf1de7e4..d759025973 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -993,7 +993,7 @@ def json(self): tasks.rfc_editor_index_update_task(full_index=True) self.assertTrue(rsync_task_mock.called) rsync_task_args, rsync_task_kwargs = rsync_task_mock.call_args - self.assertEqual((([1000],), {}), (rsync_task_args, rsync_task_kwargs)) + self.assertEqual((([rfc.rfc_number],), {}), (rsync_task_args, rsync_task_kwargs)) # Test error handling requests_get_mock.reset_mock() From a7fe0f9ef3c238b74a6cd3152e2f716076ed04f7 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 15 Jan 2026 09:52:21 -0600 Subject: [PATCH 25/25] chore: adhere to task naming conventions --- ietf/sync/tasks.py | 4 ++-- ietf/sync/tests.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index d1492875e0..fc75a056ed 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -78,7 +78,7 @@ def rfc_editor_index_update_task(full_index=False): if rfc_published: newly_published.add(rfc_number) if len(newly_published) > 0: - rsync_rfcs_from_rfceditor.delay(list(newly_published)) + rsync_rfcs_from_rfceditor_task.delay(list(newly_published)) @shared_task @@ -233,7 +233,7 @@ def fix_subseries_docevents_task(): ) @shared_task -def rsync_rfcs_from_rfceditor(rfc_numbers: list[int]): +def rsync_rfcs_from_rfceditor_task(rfc_numbers: list[int]): log.log(f"Rsyncing rfcs from rfc-editor: {rfc_numbers}") from_file = None with NamedTemporaryFile(mode="w", delete_on_close=False) as fp: diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index d759025973..bcc87a43aa 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -889,7 +889,7 @@ class TaskTests(TestCase): @mock.patch("ietf.sync.tasks.rfceditor.update_docs_from_rfc_index") @mock.patch("ietf.sync.tasks.rfceditor.parse_index") @mock.patch("ietf.sync.tasks.requests.get") - @mock.patch("ietf.sync.tasks.rsync_rfcs_from_rfceditor.delay") + @mock.patch("ietf.sync.tasks.rsync_rfcs_from_rfceditor_task.delay") def test_rfc_editor_index_update_task( self, rsync_task_mock, requests_get_mock, parse_index_mock, update_docs_mock ) -> None: # the annotation here prevents mypy from complaining about annotation-unchecked @@ -1170,13 +1170,13 @@ def test_iana_protocols_update_task( @mock.patch("ietf.sync.tasks.rsync_helper") @mock.patch("ietf.sync.tasks.load_rfcs_into_blobdb") @mock.patch("ietf.sync.tasks.rebuild_reference_relations_task.delay") - def test_rsync_rfcs_from_rfceditor( + def test_rsync_rfcs_from_rfceditor_task( self, rebuild_relations_mock, load_blobs_mock, rsync_helper_mock, ): - tasks.rsync_rfcs_from_rfceditor([12345, 54321]) + tasks.rsync_rfcs_from_rfceditor_task([12345, 54321]) self.assertTrue(rsync_helper_mock.called) self.assertTrue(load_blobs_mock.called) load_blobs_args, load_blobs_kwargs = load_blobs_mock.call_args