diff --git a/ietf/sync/tests_utils.py b/ietf/sync/tests_utils.py index eb4b4ddf74..bb4a859e30 100644 --- a/ietf/sync/tests_utils.py +++ b/ietf/sync/tests_utils.py @@ -5,6 +5,7 @@ from django.test import override_settings from ietf import settings +from ietf.doc.factories import RfcFactory from ietf.doc.storage_utils import exists_in_storage, retrieve_str from ietf.sync.utils import build_from_file_content, load_rfcs_into_blobdb, rsync_helper from ietf.utils.test_utils import TestCase @@ -59,6 +60,7 @@ def test_load_rfcs_into_blobdb(self): rfc_path = Path(faux_rfc_path) (rfc_path / "prerelease").mkdir() for num in [12345, 54321]: + RfcFactory(rfc_number=num) for ext in settings.RFC_FILE_TYPES + ("json",): with (rfc_path / f"rfc{num}.{ext}").open("w") as f: f.write(ext) diff --git a/ietf/sync/utils.py b/ietf/sync/utils.py index 5b5f8ff559..b3bdd8d206 100644 --- a/ietf/sync/utils.py +++ b/ietf/sync/utils.py @@ -6,8 +6,8 @@ from pathlib import Path from django.conf import settings - from ietf.utils import log +from ietf.doc.models import Document from ietf.doc.storage_utils import AlreadyExistsError, store_bytes @@ -26,17 +26,42 @@ def build_from_file_content(rfc_numbers: list[int]) -> str: def load_rfcs_into_blobdb(numbers: list[int]): types_to_load = settings.RFC_FILE_TYPES + ("json",) + rfc_docs = Document.objects.filter(type="rfc", rfc_number__in=numbers).values_list("rfc_number", flat=True) for num in numbers: - for ext in types_to_load: - fs_path = Path(settings.RFC_PATH) / f"rfc{num}.{ext}" - if fs_path.is_file(): - with fs_path.open("rb") as f: + if num in rfc_docs: + for ext in types_to_load: + fs_path = Path(settings.RFC_PATH) / f"rfc{num}.{ext}" + if fs_path.is_file(): + with fs_path.open("rb") as f: + bytes = f.read() + mtime = fs_path.stat().st_mtime + try: + store_bytes( + kind="rfc", + name=f"{ext}/rfc{num}.{ext}", + content=bytes, + allow_overwrite=False, # Intentionally not allowing overwrite. + doc_name=f"rfc{num}", + doc_rev=None, + # Not setting content_type + mtime=datetime.datetime.fromtimestamp( + mtime, tz=datetime.UTC + ), + ) + except AlreadyExistsError as e: + log.log(str(e)) + + # store the not-prepped xml + name = f"rfc{num}.notprepped.xml" + source = Path(settings.RFC_PATH) / "prerelease" / name + if source.is_file(): + with open(source, "rb") as f: bytes = f.read() - mtime = fs_path.stat().st_mtime + mtime = source.stat().st_mtime try: store_bytes( kind="rfc", - name=f"{ext}/rfc{num}.{ext}", + name=f"notprepped/{name}", content=bytes, allow_overwrite=False, # Intentionally not allowing overwrite. doc_name=f"rfc{num}", @@ -46,24 +71,7 @@ def load_rfcs_into_blobdb(numbers: list[int]): ) except AlreadyExistsError as e: log.log(str(e)) - - # store the not-prepped xml - name = f"rfc{num}.notprepped.xml" - source = Path(settings.RFC_PATH) / "prerelease" / name - if source.is_file(): - with open(source, "rb") as f: - bytes = f.read() - mtime = source.stat().st_mtime - try: - store_bytes( - kind="rfc", - name=f"notprepped/{name}", - content=bytes, - allow_overwrite=False, # Intentionally not allowing overwrite. - doc_name=f"rfc{num}", - doc_rev=None, - # Not setting content_type - mtime=datetime.datetime.fromtimestamp(mtime, tz=datetime.UTC), - ) - except AlreadyExistsError as e: - log.log(str(e)) + else: + log.log( + f"Skipping loading rfc{num} into blobdb as no matching Document exists" + )