From 6ad70c2bdfeb4d444052da348e4226b8420c06ec Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 9 Jun 2026 15:19:53 -0300 Subject: [PATCH 1/5] refactor: RFCINDEX_INPUT_PATH dflt -> prod value --- ietf/sync/rfcindex.py | 13 +++++++------ k8s/settings_local.py | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ietf/sync/rfcindex.py b/ietf/sync/rfcindex.py index be55a6866e..2f34673699 100644 --- a/ietf/sync/rfcindex.py +++ b/ietf/sync/rfcindex.py @@ -86,9 +86,12 @@ class UnusableRfcNumber: comment: str +def red_bucket_input_path(filename: str) -> str: + return str(Path(getattr(settings, "RFCINDEX_INPUT_PATH", "other/")) / filename) + + def get_unusable_rfc_numbers() -> list[UnusableRfcNumber]: - FILENAME = "unusable-rfc-numbers.json" - bucket_path = str(Path(getattr(settings, "RFCINDEX_INPUT_PATH", "")) / FILENAME) + bucket_path = red_bucket_input_path("unusable-rfc-numbers.json") try: with storages["red_bucket"].open(bucket_path) as urn_file: records = json.load(urn_file) @@ -115,8 +118,7 @@ def get_unusable_rfc_numbers() -> list[UnusableRfcNumber]: def get_april1_rfc_numbers() -> Container[int]: - FILENAME = "april-first-rfc-numbers.json" - bucket_path = str(Path(getattr(settings, "RFCINDEX_INPUT_PATH", "")) / FILENAME) + bucket_path = red_bucket_input_path("april-first-rfc-numbers.json") try: with storages["red_bucket"].open(bucket_path) as urn_file: records = json.load(urn_file) @@ -139,8 +141,7 @@ def get_april1_rfc_numbers() -> Container[int]: def get_publication_std_levels() -> dict[int, StdLevelName]: - FILENAME = "publication-std-levels.json" - bucket_path = str(Path(getattr(settings, "RFCINDEX_INPUT_PATH", "")) / FILENAME) + bucket_path = red_bucket_input_path("publication-std-levels.json") values: dict[int, StdLevelName] = {} try: with storages["red_bucket"].open(bucket_path) as urn_file: diff --git a/k8s/settings_local.py b/k8s/settings_local.py index 20c5252ff0..0408be377c 100644 --- a/k8s/settings_local.py +++ b/k8s/settings_local.py @@ -473,7 +473,7 @@ def _multiline_to_list(s): } RFCINDEX_DELETE_THEN_WRITE = False # S3Storage allows file_overwrite by default RFCINDEX_OUTPUT_PATH = os.environ.get("DATATRACKER_RFCINDEX_OUTPUT_PATH", "other/") -RFCINDEX_INPUT_PATH = os.environ.get("DATATRACKER_RFCINDEX_INPUT_PATH", "") +RFCINDEX_INPUT_PATH = os.environ.get("DATATRACKER_RFCINDEX_INPUT_PATH", "other/") # Configure the blobdb app for artifact storage _blobdb_replication_enabled = ( From 449acf085a6e0399d32ffb1fc0d4e3eb01d35037 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 9 Jun 2026 15:23:09 -0300 Subject: [PATCH 2/5] refactor: RFCINDEX_OUTPUT_PATH dflt -> prod value --- ietf/sync/rfcindex.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/ietf/sync/rfcindex.py b/ietf/sync/rfcindex.py index 2f34673699..f2cb7fd2bd 100644 --- a/ietf/sync/rfcindex.py +++ b/ietf/sync/rfcindex.py @@ -48,9 +48,17 @@ def errata_url(rfc: Document): return urljoin(settings.RFC_EDITOR_ERRATA_BASE_URL + "/", f"rfc{rfc.rfc_number}") +def red_bucket_input_path(filename: str) -> str: + return str(Path(getattr(settings, "RFCINDEX_INPUT_PATH", "other/")) / filename) + + +def red_bucket_output_path(filename: str) -> str: + return str(Path(getattr(settings, "RFCINDEX_OUTPUT_PATH", "other/")) / filename) + + def save_to_red_bucket(filename: str, content: str | bytes): red_bucket = storages["red_bucket"] - bucket_path = str(Path(getattr(settings, "RFCINDEX_OUTPUT_PATH", "")) / filename) + bucket_path = red_bucket_output_path(filename) if getattr(settings, "RFCINDEX_DELETE_THEN_WRITE", True): # Django 4.2's FileSystemStorage does not support allow_overwrite. red_bucket.delete(bucket_path) @@ -86,10 +94,6 @@ class UnusableRfcNumber: comment: str -def red_bucket_input_path(filename: str) -> str: - return str(Path(getattr(settings, "RFCINDEX_INPUT_PATH", "other/")) / filename) - - def get_unusable_rfc_numbers() -> list[UnusableRfcNumber]: bucket_path = red_bucket_input_path("unusable-rfc-numbers.json") try: From 08e60c0fe140a2f873299b833a1984965356a415 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 9 Jun 2026 15:23:32 -0300 Subject: [PATCH 3/5] ci: only change RFCINDEX_* paths if set --- k8s/settings_local.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/k8s/settings_local.py b/k8s/settings_local.py index 0408be377c..5dc31bac0e 100644 --- a/k8s/settings_local.py +++ b/k8s/settings_local.py @@ -472,8 +472,10 @@ def _multiline_to_list(s): ), } RFCINDEX_DELETE_THEN_WRITE = False # S3Storage allows file_overwrite by default -RFCINDEX_OUTPUT_PATH = os.environ.get("DATATRACKER_RFCINDEX_OUTPUT_PATH", "other/") -RFCINDEX_INPUT_PATH = os.environ.get("DATATRACKER_RFCINDEX_INPUT_PATH", "other/") +if "DATATRACKER_RFCINDEX_OUTPUT_PATH" in os.environ: + RFCINDEX_OUTPUT_PATH = os.environ.get("DATATRACKER_RFCINDEX_OUTPUT_PATH") +if "DATATRACKER_RFCINDEX_INPUT_PATH" in os.environ: + RFCINDEX_INPUT_PATH = os.environ.get("DATATRACKER_RFCINDEX_INPUT_PATH") # Configure the blobdb app for artifact storage _blobdb_replication_enabled = ( From 7c7feaca5a1e18c83be09271447a412e80409b45 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 9 Jun 2026 20:36:16 -0300 Subject: [PATCH 4/5] refactor: clearer settings --- ietf/settings.py | 4 ++++ ietf/sync/rfcindex.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ietf/settings.py b/ietf/settings.py index 95f2ffefd7..27d2d9cd5c 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -976,6 +976,10 @@ def skip_unreadable_post(record): ) RFC_FILE_TYPES = IDSUBMIT_FILE_TYPES +# Paths in the red bucket +RFCINDEX_INPUT_PATH = "other/" +RFCINDEX_OUTPUT_PATH = "other/" + IDSUBMIT_MAX_DRAFT_SIZE = { 'txt': 2*1024*1024, # Max size of txt draft file in bytes 'xml': 3*1024*1024, # Max size of xml draft file in bytes diff --git a/ietf/sync/rfcindex.py b/ietf/sync/rfcindex.py index f2cb7fd2bd..f47974f900 100644 --- a/ietf/sync/rfcindex.py +++ b/ietf/sync/rfcindex.py @@ -49,11 +49,11 @@ def errata_url(rfc: Document): def red_bucket_input_path(filename: str) -> str: - return str(Path(getattr(settings, "RFCINDEX_INPUT_PATH", "other/")) / filename) + return str(Path(settings.RFCINDEX_INPUT_PATH) / filename) def red_bucket_output_path(filename: str) -> str: - return str(Path(getattr(settings, "RFCINDEX_OUTPUT_PATH", "other/")) / filename) + return str(Path(settings.RFCINDEX_OUTPUT_PATH) / filename) def save_to_red_bucket(filename: str, content: str | bytes): From 908ee9b65c32139c3c9ce777153eb076928a0d8a Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 9 Jun 2026 20:36:29 -0300 Subject: [PATCH 5/5] test: update / fix / add tests --- ietf/sync/tests_rfcindex.py | 50 +++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/ietf/sync/tests_rfcindex.py b/ietf/sync/tests_rfcindex.py index 2b70924db3..a18675407e 100644 --- a/ietf/sync/tests_rfcindex.py +++ b/ietf/sync/tests_rfcindex.py @@ -28,9 +28,11 @@ get_april1_rfc_numbers, get_publication_std_levels, get_unusable_rfc_numbers, + red_bucket_input_path, + red_bucket_output_path, + save_to_filesystem, save_to_red_bucket, subseries_text_line, - save_to_filesystem, ) from ietf.utils.test_utils import TestCase @@ -398,25 +400,47 @@ def test_create_fyi_txt_index(self, mock_save_blob, mock_save_file): ) +@override_settings(RFCINDEX_INPUT_PATH="input/", RFCINDEX_OUTPUT_PATH="output/") class HelperTests(TestCase): + INPUT_PATH = "input" + OUTPUT_PATH = "output" + def test_format_rfc_number(self): self.assertEqual(format_rfc_number(10), "10") with override_settings(RFCINDEX_MATCH_LEGACY_XML=True): self.assertEqual(format_rfc_number(10), "0010") + def test_red_bucket_input_path(self): + with override_settings(RFCINDEX_INPUT_PATH="bar"): + self.assertEqual(red_bucket_input_path("foo"), "bar/foo") + with override_settings(RFCINDEX_INPUT_PATH="bar/"): + self.assertEqual(red_bucket_input_path("foo"), "bar/foo") + + def test_red_bucket_output_path(self): + self.assertEqual(red_bucket_input_path("foo"), f"{self.INPUT_PATH}/foo") + with override_settings(RFCINDEX_OUTPUT_PATH="bar"): + self.assertEqual(red_bucket_output_path("foo"), "bar/foo") + with override_settings(RFCINDEX_OUTPUT_PATH="bar/"): + self.assertEqual(red_bucket_output_path("foo"), "bar/foo") + def test_save_to_red_bucket(self): red_bucket = storages["red_bucket"] with override_settings(RFCINDEX_DELETE_THEN_WRITE=False): save_to_red_bucket("test", "contents \U0001f600") # Read as binary and explicitly decode to confirm encoding - with red_bucket.open("test", "rb") as f: + with red_bucket.open(f"{self.OUTPUT_PATH}/test", "rb") as f: self.assertEqual(f.read().decode("utf-8"), "contents \U0001f600") with override_settings(RFCINDEX_DELETE_THEN_WRITE=True): save_to_red_bucket("test", "new contents \U0001fae0".encode("utf-8")) # Read as binary and explicitly decode to confirm encoding - with red_bucket.open("test", "rb") as f: + with red_bucket.open(f"{self.OUTPUT_PATH}/test", "rb") as f: self.assertEqual(f.read().decode("utf-8"), "new contents \U0001fae0") - red_bucket.delete("test") # clean up like a good child + red_bucket.delete(f"{self.OUTPUT_PATH}/test") # clean up like a good child + # check that we can override the path + with override_settings(RFCINDEX_OUTPUT_PATH="fruit"): + save_to_red_bucket("test", "content") + self.assertTrue(red_bucket.exists("fruit/test")) + red_bucket.delete("fruit/test") # clean up like a good child def test_save_to_filesystem(self): rfc_path = Path(settings.RFC_PATH) @@ -442,30 +466,36 @@ def test_get_unusable_rfc_numbers_raises(self): with self.assertRaises(FileNotFoundError): get_unusable_rfc_numbers() red_bucket = storages["red_bucket"] - red_bucket.save("unusable-rfc-numbers.json", ContentFile("not json")) + red_bucket.save( + f"{self.INPUT_PATH}/unusable-rfc-numbers.json", ContentFile("not json") + ) with self.assertRaises(json.JSONDecodeError): get_unusable_rfc_numbers() - red_bucket.delete("unusable-rfc-numbers.json") + red_bucket.delete(f"{self.INPUT_PATH}/unusable-rfc-numbers.json") def test_get_april1_rfc_numbers_raises(self): """get_april1_rfc_numbers should bail on errors""" with self.assertRaises(FileNotFoundError): get_april1_rfc_numbers() red_bucket = storages["red_bucket"] - red_bucket.save("april-first-rfc-numbers.json", ContentFile("not json")) + red_bucket.save( + f"{self.INPUT_PATH}/april-first-rfc-numbers.json", ContentFile("not json") + ) with self.assertRaises(json.JSONDecodeError): get_april1_rfc_numbers() - red_bucket.delete("april-first-rfc-numbers.json") + red_bucket.delete(f"{self.INPUT_PATH}/april-first-rfc-numbers.json") def test_get_publication_std_levels_raises(self): """get_publication_std_levels should bail on errors""" with self.assertRaises(FileNotFoundError): get_publication_std_levels() red_bucket = storages["red_bucket"] - red_bucket.save("publication-std-levels.json", ContentFile("not json")) + red_bucket.save( + f"{self.INPUT_PATH}/publication-std-levels.json", ContentFile("not json") + ) with self.assertRaises(json.JSONDecodeError): get_publication_std_levels() - red_bucket.delete("publication-std-levels.json") + red_bucket.delete(f"{self.INPUT_PATH}/publication-std-levels.json") def test_subseries_text_line(self): text = "foobar"