diff --git a/.github/workflows/build-base-app.yml b/.github/workflows/build-base-app.yml index 1b0855cc47..35172aa299 100644 --- a/.github/workflows/build-base-app.yml +++ b/.github/workflows/build-base-app.yml @@ -28,7 +28,7 @@ jobs: echo "IMGVERSION=$CURDATE" >> $GITHUB_ENV - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 @@ -60,7 +60,7 @@ jobs: echo "${{ env.IMGVERSION }}" > dev/build/TARGET_BASE - name: Commit CHANGELOG.md - uses: stefanzweifel/git-auto-commit-action@v6 + uses: stefanzweifel/git-auto-commit-action@v7 with: branch: ${{ github.ref_name }} commit_message: 'ci: update base image target version to ${{ env.IMGVERSION }}' diff --git a/.github/workflows/build-mq-broker.yml b/.github/workflows/build-mq-broker.yml index ef7ed2f65c..b297e34b47 100644 --- a/.github/workflows/build-mq-broker.yml +++ b/.github/workflows/build-mq-broker.yml @@ -27,7 +27,7 @@ jobs: - uses: actions/checkout@v6 - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v4 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8ec806b229..49a0e5b53b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -186,7 +186,7 @@ jobs: - name: Download a Coverage Results if: ${{ github.event.inputs.skiptests == 'false' || github.ref_name == 'release' }} - uses: actions/download-artifact@v6.0.0 + uses: actions/download-artifact@v8.0.1 with: name: coverage @@ -291,7 +291,7 @@ jobs: - name: Download Coverage Results if: ${{ github.event.inputs.skiptests == 'false' || github.ref_name == 'release' }} - uses: actions/download-artifact@v6.0.0 + uses: actions/download-artifact@v8.0.1 with: name: coverage diff --git a/.github/workflows/tests-az.yml b/.github/workflows/tests-az.yml index 8553563a19..833ca89bef 100644 --- a/.github/workflows/tests-az.yml +++ b/.github/workflows/tests-az.yml @@ -38,7 +38,7 @@ jobs: ssh-keyscan -t rsa $vminfo >> ~/.ssh/known_hosts - name: Remote SSH into VM - uses: appleboy/ssh-action@2ead5e36573f08b82fbfce1504f1a4b05a647c6f + uses: appleboy/ssh-action@0ff4204d59e8e51228ff73bce53f80d53301dee2 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index be7b834b7a..ad2e35408d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -75,7 +75,7 @@ jobs: path: geckodriver.log - name: Upload Coverage Results to Codecov - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@v6 with: disable_search: true files: coverage.xml @@ -130,6 +130,7 @@ jobs: if-no-files-found: ignore tests-playwright-legacy: + if: ${{ false }} # disable until we sort out suspected test runner issue name: Playwright Legacy Tests runs-on: ubuntu-latest container: ghcr.io/ietf-tools/datatracker-app-base:${{ inputs.targetBaseVersion }} diff --git a/.gitignore b/.gitignore index 84bc800e3b..ccc7a46b08 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .DS_store datatracker.sublime-project datatracker.sublime-workspace +/.claude /.coverage /.factoryboy_random_state /.mypy_cache diff --git a/dev/build/Dockerfile b/dev/build/Dockerfile index af43e990e0..e57fecd5f2 100644 --- a/dev/build/Dockerfile +++ b/dev/build/Dockerfile @@ -1,4 +1,4 @@ -FROM ghcr.io/ietf-tools/datatracker-app-base:20260323T1533 +FROM ghcr.io/ietf-tools/datatracker-app-base:20260410T1557 LABEL maintainer="IETF Tools Team " ENV DEBIAN_FRONTEND=noninteractive diff --git a/dev/build/TARGET_BASE b/dev/build/TARGET_BASE index 09f74cce28..f430037c09 100644 --- a/dev/build/TARGET_BASE +++ b/dev/build/TARGET_BASE @@ -1 +1 @@ -20260323T1533 +20260410T1557 diff --git a/dev/build/datatracker-start.sh b/dev/build/datatracker-start.sh index a676415a26..012a563412 100644 --- a/dev/build/datatracker-start.sh +++ b/dev/build/datatracker-start.sh @@ -45,16 +45,6 @@ cleanup () { trap 'trap "" TERM; cleanup' TERM # start gunicorn in the background so we can trap the TERM signal -gunicorn \ - -c /workspace/gunicorn.conf.py \ - --workers "${DATATRACKER_GUNICORN_WORKERS:-9}" \ - --max-requests "${DATATRACKER_GUNICORN_MAX_REQUESTS:-32768}" \ - --timeout "${DATATRACKER_GUNICORN_TIMEOUT:-180}" \ - --bind :8000 \ - --log-level "${DATATRACKER_GUNICORN_LOG_LEVEL:-info}" \ - --capture-output \ - --access-logfile -\ - ${DATATRACKER_GUNICORN_EXTRA_ARGS} \ - ietf.wsgi:application & +gunicorn -c /workspace/gunicorn.conf.py ${DATATRACKER_GUNICORN_EXTRA_ARGS} ietf.wsgi:application & gunicorn_pid=$! wait "${gunicorn_pid}" diff --git a/dev/build/gunicorn.conf.py b/dev/build/gunicorn.conf.py index 9af4478685..be8808ec48 100644 --- a/dev/build/gunicorn.conf.py +++ b/dev/build/gunicorn.conf.py @@ -1,4 +1,4 @@ -# Copyright The IETF Trust 2024-2025, All Rights Reserved +# Copyright The IETF Trust 2024-2026, All Rights Reserved import os import ietf @@ -12,6 +12,23 @@ from opentelemetry.instrumentation.pymemcache import PymemcacheInstrumentor from opentelemetry.instrumentation.requests import RequestsInstrumentor +# Bind all ipv4 interfaces and ipv6 loopback interface. Would prefer to bind all +# ipv6 as well, but something conflicts with [::]:8000. +bind = ["0.0.0.0:8000", "[::1]:8000"] + +# Disable control socket +control_socket_disable = True + +# Settings configurable via environment +workers = int(os.environ.get("DATATRACKER_GUNICORN_WORKERS", "9")) +max_requests = int(os.environ.get("DATATRACKER_GUNICORN_MAX_REQUESTS", "32768")) +timeout = int(os.environ.get("DATATRACKER_GUNICORN_TIMEOUT", "180")) +loglevel = os.environ.get("DATATRACKER_GUNICORN_LOG_LEVEL", "info") + +# Logging / stdout capture +capture_output = True +accesslog = "-" + # Configure security scheme headers for forwarded requests. Cloudflare sets X-Forwarded-Proto # for us. Don't trust any of the other similar headers. Only trust the header if it's coming # from localhost, as all legitimate traffic will reach gunicorn via co-located nginx. diff --git a/dev/deploy-to-container/package-lock.json b/dev/deploy-to-container/package-lock.json index a68f170c4b..5d5bef5604 100644 --- a/dev/deploy-to-container/package-lock.json +++ b/dev/deploy-to-container/package-lock.json @@ -10,8 +10,8 @@ "fs-extra": "^11.3.4", "nanoid": "5.1.7", "nanoid-dictionary": "5.0.0", - "slugify": "1.6.8", - "tar": "^7.5.12", + "slugify": "1.6.9", + "tar": "^7.5.13", "yargs": "^17.7.2" }, "engines": { @@ -577,9 +577,9 @@ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, "node_modules/slugify": { - "version": "1.6.8", - "resolved": "https://registry.npmjs.org/slugify/-/slugify-1.6.8.tgz", - "integrity": "sha512-HVk9X1E0gz3mSpoi60h/saazLKXKaZThMLU3u/aNwoYn8/xQyX2MGxL0ui2eaokkD7tF+Zo+cKTHUbe1mmmGzA==", + "version": "1.6.9", + "resolved": "https://registry.npmjs.org/slugify/-/slugify-1.6.9.tgz", + "integrity": "sha512-vZ7rfeehZui7wQs438JXBckYLkIIdfHOXsaVEUMyS5fHo1483l1bMdo0EDSWYclY0yZKFOipDy4KHuKs6ssvdg==", "engines": { "node": ">=8.0.0" } @@ -639,9 +639,9 @@ } }, "node_modules/tar": { - "version": "7.5.12", - "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.12.tgz", - "integrity": "sha512-9TsuLcdhOn4XztcQqhNyq1KOwOOED/3k58JAvtULiYqbO8B/0IBAAIE1hj0Svmm58k27TmcigyDI0deMlgG3uw==", + "version": "7.5.13", + "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.13.tgz", + "integrity": "sha512-tOG/7GyXpFevhXVh8jOPJrmtRpOTsYqUIkVdVooZYJS/z8WhfQUX8RJILmeuJNinGAMSu1veBr4asSHFt5/hng==", "dependencies": { "@isaacs/fs-minipass": "^4.0.0", "chownr": "^3.0.0", @@ -1194,9 +1194,9 @@ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" }, "slugify": { - "version": "1.6.8", - "resolved": "https://registry.npmjs.org/slugify/-/slugify-1.6.8.tgz", - "integrity": "sha512-HVk9X1E0gz3mSpoi60h/saazLKXKaZThMLU3u/aNwoYn8/xQyX2MGxL0ui2eaokkD7tF+Zo+cKTHUbe1mmmGzA==" + "version": "1.6.9", + "resolved": "https://registry.npmjs.org/slugify/-/slugify-1.6.9.tgz", + "integrity": "sha512-vZ7rfeehZui7wQs438JXBckYLkIIdfHOXsaVEUMyS5fHo1483l1bMdo0EDSWYclY0yZKFOipDy4KHuKs6ssvdg==" }, "split-ca": { "version": "1.0.1", @@ -1241,9 +1241,9 @@ } }, "tar": { - "version": "7.5.12", - "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.12.tgz", - "integrity": "sha512-9TsuLcdhOn4XztcQqhNyq1KOwOOED/3k58JAvtULiYqbO8B/0IBAAIE1hj0Svmm58k27TmcigyDI0deMlgG3uw==", + "version": "7.5.13", + "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.13.tgz", + "integrity": "sha512-tOG/7GyXpFevhXVh8jOPJrmtRpOTsYqUIkVdVooZYJS/z8WhfQUX8RJILmeuJNinGAMSu1veBr4asSHFt5/hng==", "requires": { "@isaacs/fs-minipass": "^4.0.0", "chownr": "^3.0.0", diff --git a/dev/deploy-to-container/package.json b/dev/deploy-to-container/package.json index aa9e82dbdf..ccc78fc63b 100644 --- a/dev/deploy-to-container/package.json +++ b/dev/deploy-to-container/package.json @@ -6,8 +6,8 @@ "fs-extra": "^11.3.4", "nanoid": "5.1.7", "nanoid-dictionary": "5.0.0", - "slugify": "1.6.8", - "tar": "^7.5.12", + "slugify": "1.6.9", + "tar": "^7.5.13", "yargs": "^17.7.2" }, "engines": { diff --git a/docker-compose.yml b/docker-compose.yml index 4c3f2f6b8e..073d04b896 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -132,6 +132,18 @@ services: volumes: - blobdb-data:/var/lib/postgresql/data +# typesense: +# image: typesense/typesense:30.1 +# restart: on-failure +# ports: +# - "8108:8108" +# volumes: +# - ./typesense-data:/data +# command: +# - '--data-dir=/data' +# - '--api-key=typesense-api-key' +# - '--enable-cors' + # Celery Beat is a periodic task runner. It is not normally needed for development, # but can be enabled by uncommenting the following. # diff --git a/ietf/api/serializers_rpc.py b/ietf/api/serializers_rpc.py index 397ca05d9b..d888de4586 100644 --- a/ietf/api/serializers_rpc.py +++ b/ietf/api/serializers_rpc.py @@ -27,7 +27,7 @@ update_action_holders, update_rfcauthors, ) -from ietf.group.models import Group +from ietf.group.models import Group, Role from ietf.group.serializers import AreaSerializer from ietf.name.models import StreamName, StdLevelName from ietf.person.models import Person @@ -97,6 +97,21 @@ class Meta: fields = ["draft_name", "authors"] +class WgChairSerializer(serializers.Serializer): + """Serialize a WG chair's name and email from a Role""" + + name = serializers.SerializerMethodField() + email = serializers.SerializerMethodField() + + @extend_schema_field(serializers.CharField) + def get_name(self, role: Role) -> str: + return role.person.plain_name() + + @extend_schema_field(serializers.EmailField) + def get_email(self, role: Role) -> str: + return role.email.email_address() + + class DocumentAuthorSerializer(serializers.ModelSerializer): """Serializer for a Person in a response""" @@ -126,6 +141,7 @@ class FullDraftSerializer(serializers.ModelSerializer): source="shepherd.person", read_only=True ) consensus = serializers.SerializerMethodField() + wg_chairs = serializers.SerializerMethodField() class Meta: model = Document @@ -145,11 +161,21 @@ class Meta: "consensus", "shepherd", "ad", + "wg_chairs", ] def get_consensus(self, doc: Document) -> Optional[bool]: return default_consensus(doc) + @extend_schema_field(WgChairSerializer(many=True)) + def get_wg_chairs(self, doc: Document): + if doc.group is None: + return [] + chairs = doc.group.role_set.filter(name_id="chair").select_related( + "person", "email" + ) + return WgChairSerializer(chairs, many=True).data + def get_source_format( self, doc: Document ) -> Literal["unknown", "xml-v2", "xml-v3", "txt"]: diff --git a/ietf/api/tests_views_rpc.py b/ietf/api/tests_views_rpc.py index 0db67e126f..180221cffc 100644 --- a/ietf/api/tests_views_rpc.py +++ b/ietf/api/tests_views_rpc.py @@ -1,4 +1,5 @@ # Copyright The IETF Trust 2025, All Rights Reserved +import datetime from io import StringIO from pathlib import Path from tempfile import TemporaryDirectory @@ -10,12 +11,15 @@ from django.test.utils import override_settings from django.urls import reverse as urlreverse import mock +from django.utils import timezone from ietf.blobdb.models import Blob from ietf.doc.factories import IndividualDraftFactory, RfcFactory, WgDraftFactory, WgRfcFactory from ietf.doc.models import RelatedDocument, Document from ietf.group.factories import RoleFactory, GroupFactory from ietf.person.factories import PersonFactory +from ietf.sync.rfcindex import rfcindex_is_dirty +from ietf.utils.models import DirtyBits from ietf.utils.test_utils import APITestCase, reload_db_objects @@ -408,8 +412,13 @@ def _valid_post_data(): ) @override_settings(APP_API_TOKENS={"ietf.api.views_rpc": ["valid-token"]}) - @mock.patch("ietf.api.views_rpc.create_rfc_index_task") - def test_refresh_rfc_index(self, mock_task): + def test_refresh_rfc_index(self): + DirtyBits.objects.create( + slug=DirtyBits.Slugs.RFCINDEX, + dirty_time=timezone.now() - datetime.timedelta(days=1), + processed_time=timezone.now() - datetime.timedelta(hours=12), + ) + self.assertFalse(rfcindex_is_dirty()) url = urlreverse("ietf.api.purple_api.refresh_rfc_index") response = self.client.get(url) self.assertEqual(response.status_code, 403) @@ -417,7 +426,7 @@ def test_refresh_rfc_index(self, mock_task): self.assertEqual(response.status_code, 403) response = self.client.get(url, headers={"X-Api-Key": "valid-token"}) self.assertEqual(response.status_code, 405) - self.assertFalse(mock_task.delay.called) + self.assertFalse(rfcindex_is_dirty()) response = self.client.post(url, headers={"X-Api-Key": "valid-token"}) self.assertEqual(response.status_code, 202) - self.assertTrue(mock_task.delay.called) + self.assertTrue(rfcindex_is_dirty()) diff --git a/ietf/api/views_rpc.py b/ietf/api/views_rpc.py index 1e96118e58..6bc45fe3da 100644 --- a/ietf/api/views_rpc.py +++ b/ietf/api/views_rpc.py @@ -32,7 +32,9 @@ EmailPersonSerializer, RfcWithAuthorsSerializer, DraftWithAuthorsSerializer, - NotificationAckSerializer, RfcPubSerializer, RfcFileSerializer, + NotificationAckSerializer, + RfcPubSerializer, + RfcFileSerializer, EditableRfcSerializer, ) from ietf.doc.models import Document, DocHistory, RfcAuthor, DocEvent @@ -45,7 +47,7 @@ update_rfc_searchindex_task, ) from ietf.person.models import Email, Person -from ietf.sync.tasks import create_rfc_index_task +from ietf.sync.rfcindex import mark_rfcindex_as_dirty class Conflict(APIException): @@ -344,9 +346,10 @@ def post(self, request): class RfcAuthorViewSet(viewsets.ReadOnlyModelViewSet): """ViewSet for RfcAuthor model - + Router needs to provide rfc_number as a kwarg """ + api_key_endpoint = "ietf.api.views_rpc" queryset = RfcAuthor.objects.all() @@ -407,7 +410,7 @@ class RfcPubFilesView(APIView): def _fs_destination(self, filename: str | Path) -> Path: """Destination for an uploaded RFC file in the filesystem - + Strips any path components in filename and returns an absolute Path. """ rfc_path = Path(settings.RFC_PATH) @@ -419,7 +422,7 @@ def _fs_destination(self, filename: str | Path) -> Path: def _blob_destination(self, filename: str | Path) -> str: """Destination name for an uploaded RFC file in the blob store - + Strips any path components in filename and returns an absolute Path. """ filename = Path(filename) # could potentially have directory components @@ -472,9 +475,7 @@ def post(self, request): code="files-exist", ) for possible_existing_blob in possible_rfc_blobs: - if exists_in_storage( - kind=blob_kind, name=possible_existing_blob - ): + if exists_in_storage(kind=blob_kind, name=possible_existing_blob): raise Conflict( "Blob(s) already exist for this RFC", code="blobs-exist", @@ -523,7 +524,9 @@ def post(self, request): # Trigger red precomputer needs_updating = [rfc.rfc_number] - for rel in rfc.relateddocument_set.filter(relationship_id__in=["obs","updates"]): + for rel in rfc.relateddocument_set.filter( + relationship_id__in=["obs", "updates"] + ): needs_updating.append(rel.target.rfc_number) trigger_red_precomputer_task.delay(rfc_number_list=sorted(needs_updating)) # Trigger search index update @@ -540,10 +543,10 @@ class RfcIndexView(APIView): @extend_schema( operation_id="refresh_rfc_index", summary="Refresh rfc-index files", - description="Requests creation of rfc-index.xml and rfc-index.txt files", + description="Requests creation of various index files.", responses={202: None}, request=None, ) def post(self, request): - create_rfc_index_task.delay() + mark_rfcindex_as_dirty() return Response(status=202) diff --git a/ietf/blobdb/admin.py b/ietf/blobdb/admin.py index 3e1a2a311f..44a30d1d7f 100644 --- a/ietf/blobdb/admin.py +++ b/ietf/blobdb/admin.py @@ -1,9 +1,12 @@ -# Copyright The IETF Trust 2025, All Rights Reserved +# Copyright The IETF Trust 2025-2026, All Rights Reserved from django.contrib import admin +from django.db.models import QuerySet from django.db.models.functions import Length from rangefilter.filters import DateRangeQuickSelectListFilterBuilder +from .apps import get_blobdb from .models import Blob, ResolvedMaterial +from .utils import queue_for_replication @admin.register(Blob) @@ -17,6 +20,7 @@ class BlobAdmin(admin.ModelAdmin): ] search_fields = ["name"] list_display_links = ["name"] + actions = ["replicate_blob"] def get_queryset(self, request): return ( @@ -30,6 +34,20 @@ def object_size(self, instance): """Get the size of the object""" return instance.object_size # annotation added in get_queryset() + @admin.action(description="Replicate blobs") + def replicate_blob(self, request, queryset: QuerySet[Blob]): + blob_count = 0 + for blob in queryset.all(): + if isinstance(blob, Blob): + queue_for_replication( + bucket=blob.bucket, name=blob.name, using=get_blobdb() + ) + blob_count += 1 + self.message_user( + request, + f"Queued replication of a total of {blob_count} Blob(s)", + ) + @admin.register(ResolvedMaterial) class ResolvedMaterialAdmin(admin.ModelAdmin): diff --git a/ietf/blobdb/models.py b/ietf/blobdb/models.py index 27325ada5d..6dbb615fa0 100644 --- a/ietf/blobdb/models.py +++ b/ietf/blobdb/models.py @@ -1,14 +1,11 @@ -# Copyright The IETF Trust 2025, All Rights Reserved -import json -from functools import partial +# Copyright The IETF Trust 2025-2026, All Rights Reserved from hashlib import sha384 from django.db import models, transaction from django.utils import timezone from .apps import get_blobdb -from .replication import replication_enabled -from .tasks import pybob_the_blob_replicator_task +from .utils import queue_for_replication class BlobQuerySet(models.QuerySet): @@ -81,24 +78,8 @@ def delete(self, **kwargs): self._emit_blob_change_event(using=db) return retval - def _emit_blob_change_event(self, using=None): - if not replication_enabled(self.bucket): - return - - # For now, fire a celery task we've arranged to guarantee in-order processing. - # Later becomes pushing an event onto a queue to a dedicated worker. - transaction.on_commit( - partial( - pybob_the_blob_replicator_task.delay, - json.dumps( - { - "name": self.name, - "bucket": self.bucket, - } - ) - ), - using=using, - ) + def _emit_blob_change_event(self, using: str | None=None): + queue_for_replication(self.bucket, self.name, using=using) class ResolvedMaterial(models.Model): diff --git a/ietf/blobdb/storage.py b/ietf/blobdb/storage.py index 4213ec801d..e304dabc5d 100644 --- a/ietf/blobdb/storage.py +++ b/ietf/blobdb/storage.py @@ -1,4 +1,4 @@ -# Copyright The IETF Trust 2025, All Rights Reserved +# Copyright The IETF Trust 2025-2026, All Rights Reserved from typing import Optional from django.core.exceptions import SuspiciousFileOperation @@ -10,6 +10,7 @@ from ietf.utils.storage import MetadataFile from .models import Blob +from .utils import queue_for_replication class BlobFile(MetadataFile): @@ -94,3 +95,12 @@ def get_available_name(self, name, max_length=None): f"asked to store the name '{name[:5]}...{name[-5:]} of length {len(name)}" ) return name # overwrite is permitted + + def force_replication(self, name: str): + """Force replication of a blob by name + + Be careful with this - replication includes replicating deletion of blobs, so + if you call it with a name that does not exist in blobdb, it will be removed + from R2 if it exists there! + """ + queue_for_replication(bucket=self.bucket_name, name=name) diff --git a/ietf/blobdb/utils.py b/ietf/blobdb/utils.py new file mode 100644 index 0000000000..93f8f2f521 --- /dev/null +++ b/ietf/blobdb/utils.py @@ -0,0 +1,32 @@ +# Copyright The IETF Trust 2026, All Rights Reserved +import json +from functools import partial + +from django.db import transaction + +from ietf.blobdb.replication import replication_enabled +from ietf.blobdb.tasks import pybob_the_blob_replicator_task + + +def queue_for_replication(bucket: str, name: str, using: str | None=None): + """Queue a blob for replication + + This is private to the blobdb app. Do not call it directly from other apps. + """ + if not replication_enabled(bucket): + return + + # For now, fire a celery task we've arranged to guarantee in-order processing. + # Later becomes pushing an event onto a queue to a dedicated worker. + transaction.on_commit( + partial( + pybob_the_blob_replicator_task.delay, + json.dumps( + { + "name": name, + "bucket": bucket, + } + ) + ), + using=using, + ) diff --git a/ietf/doc/admin.py b/ietf/doc/admin.py index 0d04e8db3a..757d3da9f9 100644 --- a/ietf/doc/admin.py +++ b/ietf/doc/admin.py @@ -5,6 +5,7 @@ from django.contrib import admin from django.db import models from django import forms +from django.db.models import QuerySet from rangefilter.filters import DateRangeQuickSelectListFilterBuilder from .models import (StateType, State, RelatedDocument, DocumentAuthor, Document, RelatedDocHistory, @@ -18,6 +19,9 @@ from ietf.utils.admin import SaferTabularInline from ietf.utils.validators import validate_external_resource_value +from .storage_utils import force_replication +from .utils import replicate_stored_objects_for_document + class StateTypeAdmin(admin.ModelAdmin): list_display = ["slug", "label"] @@ -73,7 +77,9 @@ class DocumentAuthorAdmin(admin.ModelAdmin): search_fields = ['document__name', 'person__name', 'email__address', 'affiliation', 'country'] raw_id_fields = ["document", "person", "email"] admin.site.register(DocumentAuthor, DocumentAuthorAdmin) - + + + class DocumentAdmin(admin.ModelAdmin): list_display = ['name', 'rev', 'group', 'pages', 'intended_std_level', 'author_list', 'time'] search_fields = ['name'] @@ -81,6 +87,7 @@ class DocumentAdmin(admin.ModelAdmin): raw_id_fields = ['group', 'shepherd', 'ad'] inlines = [DocAuthorInline, DocActionHolderInline, RelatedDocumentInline, AdditionalUrlInLine] form = DocumentForm + actions = ["replicate_stored_objects"] def save_model(self, request, obj, form, change): e = DocEvent.objects.create( @@ -95,6 +102,22 @@ def save_model(self, request, obj, form, change): def state(self, instance): return self.get_state() + @admin.action(description="Replicate related blobs") + def replicate_stored_objects(self, request, queryset: QuerySet[Document]): + doc_count = 0 + stored_obj_count = 0 + for doc in queryset.all(): + doc_count += 1 + if isinstance(doc, Document): + stored_obj_count += replicate_stored_objects_for_document(doc) + self.message_user( + request, + ( + f"Queued replication of a total of {stored_obj_count} StoredObject(s) " + f"for {doc_count} Document(s)" + ) + ) + admin.site.register(Document, DocumentAdmin) class DocHistoryAdmin(admin.ModelAdmin): @@ -232,11 +255,24 @@ class StoredObjectAdmin(admin.ModelAdmin): ] search_fields = ['name', 'doc_name', 'doc_rev'] list_display_links = ['name'] + actions = ["replicate_stored_object"] @admin.display(boolean=True, description="Deleted?", ordering="deleted") def is_deleted(self, instance): return instance.deleted is not None - + + @admin.action(description="Replicate related blobs") + def replicate_stored_object(self, request, queryset: QuerySet[StoredObject]): + stored_obj_count = 0 + for stored_object in queryset.all(): + if isinstance(stored_object, StoredObject): + force_replication(kind=stored_object.store, name=stored_object.name) + stored_obj_count += 1 + self.message_user( + request, + f"Queued replication of a total of {stored_obj_count} StoredObject(s)", + ) + admin.site.register(StoredObject, StoredObjectAdmin) diff --git a/ietf/doc/feeds.py b/ietf/doc/feeds.py index afe96cf0df..0269906fcf 100644 --- a/ietf/doc/feeds.py +++ b/ietf/doc/feeds.py @@ -5,6 +5,7 @@ import datetime import unicodedata +from django.conf import settings from django.contrib.syndication.views import Feed, FeedDoesNotExist from django.utils.feedgenerator import Atom1Feed, Rss201rev2Feed from django.urls import reverse as urlreverse @@ -223,7 +224,7 @@ def item_extra_kwargs(self, item): extra.update({"dcterms_accessRights": "gratis"}) extra.update({"dcterms_format": "text/html"}) media_contents = [] - if item.rfc_number < 8650: + if item.rfc_number < settings.FIRST_V3_RFC: if item.rfc_number not in [8, 9, 51, 418, 500, 530, 589]: for fmt, media_type in [("txt", "text/plain"), ("html", "text/html")]: media_contents.append( diff --git a/ietf/doc/models.py b/ietf/doc/models.py index 972f0a34e8..cc79b73831 100644 --- a/ietf/doc/models.py +++ b/ietf/doc/models.py @@ -52,6 +52,7 @@ from ietf.person.utils import get_active_balloters from ietf.utils import log from ietf.utils.decorators import memoize +from ietf.utils.text import decode_document_content from ietf.utils.validators import validate_no_control_chars from ietf.utils.mail import formataddr from ietf.utils.models import ForeignKey @@ -640,19 +641,7 @@ def text(self, size = -1): except IOError as e: log.log(f"Error reading text for {path}: {e}") return None - text = None - try: - text = raw.decode('utf-8') - except UnicodeDecodeError: - for back in range(1,4): - try: - text = raw[:-back].decode('utf-8') - break - except UnicodeDecodeError: - pass - if text is None: - text = raw.decode('latin-1') - return text + return decode_document_content(raw) def text_or_error(self): return self.text() or "Error; cannot read '%s'"%self.get_base_name() diff --git a/ietf/doc/storage.py b/ietf/doc/storage.py index 375620ccaf..ee1e76c4fa 100644 --- a/ietf/doc/storage.py +++ b/ietf/doc/storage.py @@ -114,7 +114,6 @@ def _get_write_parameters(self, name, content=None): class StoredObjectBlobdbStorage(BlobdbStorage): - ietf_log_blob_timing = True warn_if_missing = True # TODO-BLOBSTORE make this configurable (or remove it) def _save_stored_object(self, name, content) -> StoredObject: diff --git a/ietf/doc/storage_utils.py b/ietf/doc/storage_utils.py index 81588c83ec..c7cc6989cd 100644 --- a/ietf/doc/storage_utils.py +++ b/ietf/doc/storage_utils.py @@ -10,6 +10,7 @@ from django.core.files.storage import storages, Storage from ietf.utils.log import log +from ietf.utils.text import decode_document_content class StorageUtilsError(Exception): @@ -164,34 +165,39 @@ def store_str( def retrieve_bytes(kind: str, name: str) -> bytes: from ietf.doc.storage import maybe_log_timing - content = b"" - if settings.ENABLE_BLOBSTORAGE: - try: - store = _get_storage(kind) - with store.open(name) as f: - with maybe_log_timing( - hasattr(store, "ietf_log_blob_timing") and store.ietf_log_blob_timing, - "read", - bucket_name=store.bucket_name if hasattr(store, "bucket_name") else "", - name=name, - ): - content = f.read() - except Exception as err: - log(f"Blobstore Error: Failed to read bytes from {kind}:{name}: {repr(err)}") - if settings.SERVER_MODE == "development": - raise + if not settings.ENABLE_BLOBSTORAGE: + return b"" + try: + store = _get_storage(kind) + with store.open(name) as f: + with maybe_log_timing( + hasattr(store, "ietf_log_blob_timing") and store.ietf_log_blob_timing, + "read", + bucket_name=store.bucket_name if hasattr(store, "bucket_name") else "", + name=name, + ): + content = f.read() + except Exception as err: + log(f"Blobstore Error: Failed to read bytes from {kind}:{name}: {repr(err)}") + raise return content def retrieve_str(kind: str, name: str) -> str: - content = "" - if settings.ENABLE_BLOBSTORAGE: - try: - content_bytes = retrieve_bytes(kind, name) - # TODO-BLOBSTORE: try to decode all the different ways doc.text() does - content = content_bytes.decode("utf-8") - except Exception as err: - log(f"Blobstore Error: Failed to read string from {kind}:{name}: {repr(err)}") - if settings.SERVER_MODE == "development": - raise + if not settings.ENABLE_BLOBSTORAGE: + return "" + try: + content = decode_document_content(retrieve_bytes(kind, name)) + except Exception as err: + log(f"Blobstore Error: Failed to read string from {kind}:{name}: {repr(err)}") + raise return content + + +def force_replication(kind: str, name: str): + if not settings.ENABLE_BLOBSTORAGE: + return + storage = _get_storage(kind) + from ietf.blobdb.storage import BlobdbStorage + if isinstance(storage, BlobdbStorage): + storage.force_replication(name) diff --git a/ietf/doc/tasks.py b/ietf/doc/tasks.py index 19edb39014..273242e35f 100644 --- a/ietf/doc/tasks.py +++ b/ietf/doc/tasks.py @@ -209,3 +209,14 @@ def update_rfc_searchindex_task(self, rfc_number: int): countdown=searchindex_settings["TASK_RETRY_DELAY"], max_retries=searchindex_settings["TASK_MAX_RETRIES"], ) + + +@shared_task +def rebuild_searchindex_task(*, batchsize=40, drop_collection=False): + if drop_collection: + searchindex.delete_collection() + searchindex.create_collection() + searchindex.update_or_create_rfc_entries( + Document.objects.filter(type_id="rfc").order_by("-rfc_number"), + batchsize=batchsize, + ) diff --git a/ietf/doc/tests_notprepped.py b/ietf/doc/tests_notprepped.py new file mode 100644 index 0000000000..f417aa7931 --- /dev/null +++ b/ietf/doc/tests_notprepped.py @@ -0,0 +1,122 @@ +# Copyright The IETF Trust 2026, All Rights Reserved + +from django.conf import settings +from django.utils import timezone +from django.urls import reverse as urlreverse + +from pyquery import PyQuery + +from ietf.doc.factories import WgRfcFactory +from ietf.doc.models import StoredObject +from ietf.doc.storage_utils import store_bytes +from ietf.utils.test_utils import TestCase + + +class NotpreppedRfcXmlTests(TestCase): + def test_editor_source_button_visibility(self): + pre_v3 = WgRfcFactory(rfc_number=settings.FIRST_V3_RFC - 1) + first_v3 = WgRfcFactory(rfc_number=settings.FIRST_V3_RFC) + post_v3 = WgRfcFactory(rfc_number=settings.FIRST_V3_RFC + 1) + + for rfc, expect_button in [(pre_v3, False), (first_v3, True), (post_v3, True)]: + r = self.client.get( + urlreverse( + "ietf.doc.views_doc.document_main", kwargs=dict(name=rfc.name) + ) + ) + self.assertEqual(r.status_code, 200) + buttons = PyQuery(r.content)('a.btn:contains("Get editor source")') + if expect_button: + self.assertEqual(len(buttons), 1, msg=f"rfc_number={rfc.rfc_number}") + expected_href = urlreverse( + "ietf.doc.views_doc.rfcxml_notprepped_wrapper", + kwargs=dict(number=rfc.rfc_number), + ) + self.assertEqual( + buttons.attr("href"), + expected_href, + msg=f"rfc_number={rfc.rfc_number}", + ) + else: + self.assertEqual(len(buttons), 0, msg=f"rfc_number={rfc.rfc_number}") + + def test_rfcxml_notprepped(self): + number = settings.FIRST_V3_RFC + stored_name = f"notprepped/rfc{number}.notprepped.xml" + url = f"/doc/rfc{number}/notprepped/" + + # 404 for pre-v3 RFC numbers (no document needed) + r = self.client.get(f"/doc/rfc{number - 1}/notprepped/") + self.assertEqual(r.status_code, 404) + + # 404 when no RFC document exists in the database + r = self.client.get(url) + self.assertEqual(r.status_code, 404) + + # 404 when RFC document exists but has no StoredObject + WgRfcFactory(rfc_number=number) + r = self.client.get(url) + self.assertEqual(r.status_code, 404) + + # 404 when StoredObject exists but backing storage is missing (FileNotFoundError) + now = timezone.now() + StoredObject.objects.create( + store="rfc", + name=stored_name, + sha384="a" * 96, + len=0, + store_created=now, + created=now, + modified=now, + ) + r = self.client.get(url) + self.assertEqual(r.status_code, 404) + + # 200 with correct content-type, attachment disposition, and body when object is fully stored + xml_content = b"test" + store_bytes("rfc", stored_name, xml_content, allow_overwrite=True) + r = self.client.get(url) + self.assertEqual(r.status_code, 200) + self.assertEqual(r["Content-Type"], "application/xml") + self.assertEqual( + r["Content-Disposition"], + f'attachment; filename="rfc{number}.notprepped.xml"', + ) + self.assertEqual(b"".join(r.streaming_content), xml_content) + + def test_rfcxml_notprepped_wrapper(self): + number = settings.FIRST_V3_RFC + + # 404 for pre-v3 RFC numbers (no document needed) + r = self.client.get( + urlreverse( + "ietf.doc.views_doc.rfcxml_notprepped_wrapper", + kwargs=dict(number=number - 1), + ) + ) + self.assertEqual(r.status_code, 404) + + # 404 when no RFC document exists in the database + r = self.client.get( + urlreverse( + "ietf.doc.views_doc.rfcxml_notprepped_wrapper", + kwargs=dict(number=number), + ) + ) + self.assertEqual(r.status_code, 404) + + # 200 with rendered template when RFC document exists + rfc = WgRfcFactory(rfc_number=number) + r = self.client.get( + urlreverse( + "ietf.doc.views_doc.rfcxml_notprepped_wrapper", + kwargs=dict(number=number), + ) + ) + self.assertEqual(r.status_code, 200) + q = PyQuery(r.content) + self.assertIn(str(rfc.rfc_number), q("h1").text()) + download_url = urlreverse( + "ietf.doc.views_doc.rfcxml_notprepped", kwargs=dict(number=number) + ) + self.assertEqual(len(q(f'a.btn[href="{download_url}"]')), 1) diff --git a/ietf/doc/tests_tasks.py b/ietf/doc/tests_tasks.py index 728d21f131..2e2d65463f 100644 --- a/ietf/doc/tests_tasks.py +++ b/ietf/doc/tests_tasks.py @@ -24,6 +24,7 @@ generate_idnits2_rfc_status_task, investigate_fragment_task, notify_expirations_task, + rebuild_searchindex_task, update_rfc_searchindex_task, ) @@ -144,6 +145,48 @@ def test_update_rfc_searchindex_task( with self.assertRaises(Retry): update_rfc_searchindex_task(rfc_number=rfc.rfc_number) + @mock.patch("ietf.doc.tasks.searchindex.update_or_create_rfc_entries") + @mock.patch("ietf.doc.tasks.searchindex.create_collection") + @mock.patch("ietf.doc.tasks.searchindex.delete_collection") + def test_rebuild_searchindex_task(self, mock_delete, mock_create, mock_update): + rfcs = WgRfcFactory.create_batch(10) + rebuild_searchindex_task() + self.assertFalse(mock_delete.called) + self.assertFalse(mock_create.called) + self.assertTrue(mock_update.called) + self.assertQuerysetEqual( + mock_update.call_args.args[0], + sorted(rfcs, key=lambda doc: -doc.rfc_number), + ordered=True, + ) + + mock_delete.reset_mock() + mock_create.reset_mock() + mock_update.reset_mock() + rebuild_searchindex_task(drop_collection=True) + self.assertTrue(mock_delete.called) + self.assertTrue(mock_create.called) + self.assertTrue(mock_update.called) + self.assertQuerysetEqual( + mock_update.call_args.args[0], + sorted(rfcs, key=lambda doc: -doc.rfc_number), + ordered=True, + ) + + mock_delete.reset_mock() + mock_create.reset_mock() + mock_update.reset_mock() + rebuild_searchindex_task(drop_collection=True, batchsize=3) + self.assertTrue(mock_delete.called) + self.assertTrue(mock_create.called) + self.assertTrue(mock_update.called) + self.assertQuerysetEqual( + mock_update.call_args.args[0], + sorted(rfcs, key=lambda doc: -doc.rfc_number), + ordered=True, + ) + self.assertEqual(mock_update.call_args.kwargs["batchsize"], 3) + class Idnits2SupportTests(TestCase): settings_temp_path_overrides = TestCase.settings_temp_path_overrides + [ diff --git a/ietf/doc/urls.py b/ietf/doc/urls.py index 61e94b2231..0c13503b78 100644 --- a/ietf/doc/urls.py +++ b/ietf/doc/urls.py @@ -99,6 +99,8 @@ url(r'^%(name)s(?:/%(rev)s)?/$' % settings.URL_REGEXPS, views_doc.document_main), url(r'^%(name)s(?:/%(rev)s)?/bibtex/$' % settings.URL_REGEXPS, views_doc.document_bibtex), + url(r'^rfc(?P[0-9]+)/notprepped/$' , views_doc.rfcxml_notprepped), + url(r'^rfc(?P[0-9]+)/notprepped-wrapper/$', views_doc.rfcxml_notprepped_wrapper), url(r'^%(name)s(?:/%(rev)s)?/idnits2-state/$' % settings.URL_REGEXPS, views_doc.idnits2_state), url(r'^bibxml3/reference.I-D.%(name)s(?:-%(rev)s)?.xml$' % settings.URL_REGEXPS, views_doc.document_bibxml_ref), url(r'^bibxml3/%(name)s(?:-%(rev)s)?.xml$' % settings.URL_REGEXPS, views_doc.document_bibxml), diff --git a/ietf/doc/utils.py b/ietf/doc/utils.py index 8cbe5e8f3e..6f32ed454f 100644 --- a/ietf/doc/utils.py +++ b/ietf/doc/utils.py @@ -39,12 +39,15 @@ DocHistoryAuthor, Document, DocumentAuthor, + EditedRfcAuthorsDocEvent, RfcAuthor, - State, EditedRfcAuthorsDocEvent, + State, + StoredObject, ) from ietf.doc.models import RelatedDocument, RelatedDocHistory, BallotType, DocReminder from ietf.doc.models import DocEvent, ConsensusDocEvent, BallotDocEvent, IRSGBallotDocEvent, NewRevisionDocEvent, StateDocEvent from ietf.doc.models import TelechatDocEvent, DocumentActionHolder, EditedAuthorsDocEvent, BallotPositionDocEvent +from ietf.doc.storage_utils import force_replication from ietf.name.models import DocReminderTypeName, DocRelationshipName from ietf.group.models import Role, Group, GroupFeatures from ietf.ietfauth.utils import has_role, is_authorized_in_doc_stream, is_individual_draft_author, is_bofreq_editor @@ -1713,3 +1716,23 @@ def update_or_create_draft_bibxml_file(doc, rev): def ensure_draft_bibxml_path_exists(): (Path(settings.BIBXML_BASE_PATH) / "bibxml-ids").mkdir(exist_ok=True) + + +def replicate_stored_objects_for_document(doc: Document) -> int: + """Sync all StoredObjects associated with doc to the replica blob store + + Returns count of StoredObjects queued for replication (which may or may not + be replicated, depending on whether replication is enabled / the storages are + actually BlobdbStorage instances, etc). + """ + # n.b., StoredObjects have a nullable doc_rev field, but Documents do not. + # Until / unless we straighten that out, treat "" and None equivalently when + # matching rev. + qs_matching_rev = StoredObject.objects.filter(doc_rev=doc.rev) + if doc.rev == "": + qs_matching_rev |= StoredObject.objects.filter(doc_rev__isnull=True) + count = 0 + for stored_object in qs_matching_rev.filter(doc_name=doc.name): + force_replication(kind=stored_object.store, name=stored_object.name) + count += 1 + return count diff --git a/ietf/doc/views_doc.py b/ietf/doc/views_doc.py index c1f6352ac3..5b57a62074 100644 --- a/ietf/doc/views_doc.py +++ b/ietf/doc/views_doc.py @@ -1,4 +1,4 @@ -# Copyright The IETF Trust 2009-2024, All Rights Reserved +# Copyright The IETF Trust 2009-2026, All Rights Reserved # -*- coding: utf-8 -*- # # Parts Copyright (C) 2009-2010 Nokia Corporation and/or its subsidiary(-ies). @@ -43,9 +43,10 @@ from celery.result import AsyncResult from django.core.cache import caches +from django.core.files.base import ContentFile from django.core.exceptions import PermissionDenied from django.db.models import Max -from django.http import HttpResponse, Http404, HttpResponseBadRequest, JsonResponse +from django.http import FileResponse, HttpResponse, Http404, HttpResponseBadRequest, JsonResponse from django.shortcuts import render, get_object_or_404, redirect from django.template.loader import render_to_string from django.urls import reverse as urlreverse @@ -57,7 +58,7 @@ import debug # pyflakes:ignore from ietf.doc.models import ( Document, DocHistory, DocEvent, BallotDocEvent, BallotType, - ConsensusDocEvent, NewRevisionDocEvent, TelechatDocEvent, WriteupDocEvent, IanaExpertDocEvent, + ConsensusDocEvent, NewRevisionDocEvent, StoredObject, TelechatDocEvent, WriteupDocEvent, IanaExpertDocEvent, IESG_BALLOT_ACTIVE_STATES, STATUSCHANGE_RELATIONS, DocumentActionHolder, DocumentAuthor, RelatedDocument, RelatedDocHistory) from ietf.doc.tasks import investigate_fragment_task @@ -86,6 +87,7 @@ from ietf.review.models import ReviewAssignment from ietf.review.utils import can_request_review_of_doc, review_assignments_to_list_for_docs, review_requests_to_list_for_docs from ietf.review.utils import no_review_from_teams_on_doc +from ietf.doc.storage_utils import retrieve_bytes from ietf.utils import markup_txt, log, markdown from ietf.utils.draft import get_status_from_draft_text from ietf.utils.meetecho import MeetechoAPIError, SlidesManager @@ -2356,3 +2358,29 @@ def investigate(request): "results": results, }, ) + +def rfcxml_notprepped(request, number): + number = int(number) + if number < settings.FIRST_V3_RFC: + raise Http404 + rfc = Document.objects.filter(type="rfc", rfc_number=number).first() + if rfc is None: + raise Http404 + name = f"notprepped/rfc{number}.notprepped.xml" + if not StoredObject.objects.filter(name=name).exists(): + raise Http404 + try: + bytes = retrieve_bytes("rfc", name) + except FileNotFoundError: + raise Http404 + return FileResponse(ContentFile(bytes, name=f"rfc{number}.notprepped.xml"), as_attachment=True) + + +def rfcxml_notprepped_wrapper(request, number): + number = int(number) + if number < settings.FIRST_V3_RFC: + raise Http404 + rfc = Document.objects.filter(type="rfc", rfc_number=number).first() + if rfc is None: + raise Http404 + return render(request, "doc/notprepped_wrapper.html", context={"rfc": rfc}) diff --git a/ietf/group/serializers.py b/ietf/group/serializers.py index db3b37af48..e789ba46bf 100644 --- a/ietf/group/serializers.py +++ b/ietf/group/serializers.py @@ -20,8 +20,14 @@ class AreaDirectorSerializer(serializers.Serializer): Works with Email or Role """ + name = serializers.SerializerMethodField() email = serializers.SerializerMethodField() + @extend_schema_field(serializers.CharField) + def get_name(self, instance: Email | Role): + person = getattr(instance, 'person', None) + return person.plain_name() if person else None + @extend_schema_field(serializers.EmailField) def get_email(self, instance: Email | Role): if isinstance(instance, Role): diff --git a/ietf/group/tests_review.py b/ietf/group/tests_review.py index 89c755bb26..bb9b79a416 100644 --- a/ietf/group/tests_review.py +++ b/ietf/group/tests_review.py @@ -888,10 +888,10 @@ def test_requests_history_filter_page(self): self.assertEqual(r.status_code, 200) self.assertContains(r, review_req.doc.name) self.assertContains(r, review_req2.doc.name) - self.assertContains(r, 'Assigned') - self.assertContains(r, 'Accepted') - self.assertContains(r, 'Completed') - self.assertContains(r, 'Ready') + self.assertContains(r, 'data-text="Assigned"') + self.assertContains(r, 'data-text="Accepted"') + self.assertContains(r, 'data-text="Completed"') + self.assertContains(r, 'data-text="Ready"') self.assertContains(r, escape(assignment.reviewer.person.name)) self.assertContains(r, escape(assignment2.reviewer.person.name)) @@ -907,10 +907,10 @@ def test_requests_history_filter_page(self): self.assertEqual(r.status_code, 200) self.assertContains(r, review_req.doc.name) self.assertNotContains(r, review_req2.doc.name) - self.assertContains(r, 'Assigned') - self.assertNotContains(r, 'Accepted') - self.assertNotContains(r, 'Completed') - self.assertNotContains(r, 'Ready') + self.assertContains(r, 'data-text="Assigned"') + self.assertNotContains(r, 'data-text="Accepted"') + self.assertNotContains(r, 'data-text="Completed"') + self.assertNotContains(r, 'data-text="Ready"') self.assertContains(r, escape(assignment.reviewer.person.name)) self.assertNotContains(r, escape(assignment2.reviewer.person.name)) @@ -926,10 +926,10 @@ def test_requests_history_filter_page(self): self.assertEqual(r.status_code, 200) self.assertNotContains(r, review_req.doc.name) self.assertContains(r, review_req2.doc.name) - self.assertNotContains(r, 'Assigned') - self.assertContains(r, 'Accepted') - self.assertContains(r, 'Completed') - self.assertContains(r, 'Ready') + self.assertNotContains(r, 'data-text="Assigned"') + self.assertContains(r, 'data-text="Accepted"') + self.assertContains(r, 'data-text="Completed"') + self.assertContains(r, 'data-text="Ready"') self.assertNotContains(r, escape(assignment.reviewer.person.name)) self.assertContains(r, escape(assignment2.reviewer.person.name)) @@ -940,9 +940,9 @@ def test_requests_history_filter_page(self): r = self.client.get(url) self.assertEqual(r.status_code, 200) self.assertNotContains(r, review_req.doc.name) - self.assertNotContains(r, 'Assigned') - self.assertNotContains(r, 'Accepted') - self.assertNotContains(r, 'Completed') + self.assertNotContains(r, 'data-text="Assigned"') + self.assertNotContains(r, 'data-text="Accepted"') + self.assertNotContains(r, 'data-text="Completed"') def test_requests_history_invalid_filter_parameters(self): # First assignment as assigned diff --git a/ietf/group/tests_serializers.py b/ietf/group/tests_serializers.py index bf29e6c8fd..b584a17ae2 100644 --- a/ietf/group/tests_serializers.py +++ b/ietf/group/tests_serializers.py @@ -31,7 +31,7 @@ def test_serializes_role(self): serialized = AreaDirectorSerializer(role).data self.assertEqual( serialized, - {"email": role.email.email_address()}, + {"email": role.email.email_address(), "name": role.person.plain_name()}, ) def test_serializes_email(self): @@ -40,7 +40,10 @@ def test_serializes_email(self): serialized = AreaDirectorSerializer(email).data self.assertEqual( serialized, - {"email": email.email_address()}, + { + "email": email.email_address(), + "name": email.person.plain_name() if email.person else None, + }, ) @@ -63,7 +66,10 @@ def test_serializes_active_area(self): self.assertEqual(serialized["name"], area.name) self.assertCountEqual( serialized["ads"], - [{"email": ad.email.email_address()} for ad in ad_roles], + [ + {"email": ad.email.email_address(), "name": ad.person.plain_name()} + for ad in ad_roles + ], ) def test_serializes_inactive_area(self): diff --git a/ietf/meeting/tests_session_requests.py b/ietf/meeting/tests_session_requests.py index 0cb092d2f8..42dbee5f23 100644 --- a/ietf/meeting/tests_session_requests.py +++ b/ietf/meeting/tests_session_requests.py @@ -236,7 +236,7 @@ def test_edit(self): self.assertRedirects(r, redirect_url) # Check whether updates were stored in the database - sessions = Session.objects.filter(meeting=meeting, group=mars) + sessions = Session.objects.filter(meeting=meeting, group=mars).order_by("id") self.assertEqual(len(sessions), 2) session = sessions[0] self.assertFalse(session.constraints().filter(name='time_relation')) diff --git a/ietf/meeting/tests_views.py b/ietf/meeting/tests_views.py index 258ffe554c..17988e50be 100644 --- a/ietf/meeting/tests_views.py +++ b/ietf/meeting/tests_views.py @@ -33,6 +33,7 @@ from django.http import QueryDict, FileResponse from django.template import Context, Template from django.utils import timezone +from django.utils.html import escape from django.utils.safestring import mark_safe from django.utils.text import slugify @@ -9491,7 +9492,7 @@ def test_session_attendance(self): self.assertEqual(r.status_code, 200) self.assertContains(r, '3 attendees') for person in persons: - self.assertContains(r, person.plain_name()) + self.assertContains(r, escape(person.plain_name())) # Test for the "I was there" button. def _test_button(person, expected): @@ -9511,14 +9512,14 @@ def _test_button(person, expected): # attempt to POST anyway is ignored r = self.client.post(attendance_url) self.assertEqual(r.status_code, 200) - self.assertNotContains(r, persons[3].plain_name()) + self.assertNotContains(r, escape(persons[3].plain_name())) self.assertEqual(session.attended_set.count(), 3) # button is shown, and POST is accepted meeting.importantdate_set.update(name_id='revsub',date=date_today() + datetime.timedelta(days=20)) _test_button(persons[3], True) r = self.client.post(attendance_url) self.assertEqual(r.status_code, 200) - self.assertContains(r, persons[3].plain_name()) + self.assertContains(r, escape(persons[3].plain_name())) self.assertEqual(session.attended_set.count(), 4) # When the meeting is finalized, a bluesheet file is generated, diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index bdf3d3d3d3..10ae0d3667 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -1025,9 +1025,18 @@ def resolve_materials_for_one_meeting(meeting: Meeting): ) def resolve_uploaded_material(meeting: Meeting, doc: Document): - resolved = [] + resolved: list[ResolvedMaterial] = [] + remove = ResolvedMaterial.objects.none() blob = resolve_one_material(doc, rev=None, ext=None) - if blob is not None: + if blob is None: + # Versionless file does not exist. Remove the versionless ResolvedMaterial + # if it existed. This is to avoid leaving behind a stale link to a replaced + # version. This comes up e.g. if a ProceedingsMaterial is changed from having + # an uploaded file to being an external URL. + remove = ResolvedMaterial.objects.filter( + name=doc.name, meeting_number=meeting.number + ) + else: resolved.append( ResolvedMaterial( name=doc.name, @@ -1047,12 +1056,15 @@ def resolve_uploaded_material(meeting: Meeting, doc: Document): blob=blob.name, ) ) + # Create the new record(s) ResolvedMaterial.objects.bulk_create( resolved, update_conflicts=True, unique_fields=["name", "meeting_number"], update_fields=["bucket", "blob"], ) + # and remove one if necessary (will be a none() queryset if not) + remove.delete() def store_blob_for_one_material_file(doc: Document, rev: str, filepath: Path): diff --git a/ietf/meeting/views_proceedings.py b/ietf/meeting/views_proceedings.py index d1169bff2d..639efa1da4 100644 --- a/ietf/meeting/views_proceedings.py +++ b/ietf/meeting/views_proceedings.py @@ -14,7 +14,7 @@ from ietf.meeting.models import Meeting, MeetingHost from ietf.meeting.helpers import get_meeting from ietf.name.models import ProceedingsMaterialTypeName -from ietf.meeting.utils import handle_upload_file +from ietf.meeting.utils import handle_upload_file, resolve_uploaded_material from ietf.utils.text import xslugify class UploadProceedingsMaterialForm(FileUploadForm): @@ -150,7 +150,7 @@ def save_proceedings_material_doc(meeting, material_type, title, request, file=N if events: doc.save_with_history(events) - + resolve_uploaded_material(meeting, doc) return doc diff --git a/ietf/settings.py b/ietf/settings.py index 40a4cb5c56..50e069ff1a 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -231,11 +231,14 @@ AGENDA_CACHE_TIMEOUT_DEFAULT = 8 * 24 * 60 * 60 # 8 days AGENDA_CACHE_TIMEOUT_CURRENT_MEETING = 6 * 60 # 6 minutes + WSGI_APPLICATION = "ietf.wsgi.application" AUTHENTICATION_BACKENDS = ( 'ietf.ietfauth.backends.CaseInsensitiveModelBackend', ) -FILE_UPLOAD_PERMISSIONS = 0o644 +FILE_UPLOAD_PERMISSIONS = 0o644 + +FIRST_V3_RFC = 8650 # @@ -1268,7 +1271,7 @@ def skip_unreadable_post(record): except ImportError: pass -STATS_NAMES_LIMIT = 25 +STATS_TIMELINE_CACHE_TIMEOUT = 86400 UTILS_MEETING_CONFERENCE_DOMAINS = ['webex.com', 'zoom.us', 'jitsi.org', 'meetecho.com', 'gather.town', ] UTILS_TEST_RANDOM_STATE_FILE = '.factoryboy_random_state' diff --git a/ietf/static/css/ietf.scss b/ietf/static/css/ietf.scss index df973863d5..6695c57b13 100644 --- a/ietf/static/css/ietf.scss +++ b/ietf/static/css/ietf.scss @@ -1216,3 +1216,20 @@ iframe.status { .overflow-shadows--bottom-only { box-shadow: inset 0px -21px 18px -20px var(--bs-body-color); } + +#navbar-doc-search-wrapper { + position: relative; +} + +#navbar-doc-search-results { + max-height: 400px; + overflow-y: auto; + min-width: auto; + left: 0; + right: 0; + + .dropdown-item { + white-space: normal; + overflow-wrap: break-word; + } +} diff --git a/ietf/static/js/meeting_stats.js b/ietf/static/js/meeting_stats.js new file mode 100644 index 0000000000..70b18a0f03 --- /dev/null +++ b/ietf/static/js/meeting_stats.js @@ -0,0 +1,57 @@ +// Copyright The IETF Trust 2026, All Rights Reserved +document.addEventListener('DOMContentLoaded', () => { + // Need to use autocolors plug-in else all slices are gray... + const autocolors = window['chartjs-plugin-autocolors'] + Chart.register(autocolors) + // ── Safely parse JSON data injected from Django view ── + const totalChartData = JSON.parse(document.getElementById('total-chart-data').textContent) + const inPersonChartData = JSON.parse(document.getElementById('in-person-chart-data').textContent) + + function displayChart (id, data) { + const ctx = document.getElementById(id).getContext('2d') + new Chart(ctx, { + type: 'pie', // Change to 'doughnut' for a donut chart + data: data, + options: { + responsive: true, + plugins: { + autocolors: { + mode: 'data' // Required for Pie charts to color individual slices + }, + legend: { + position: 'bottom', + labels: { + padding: 20, + font: { size: 13 }, + color: '#475569', + generateLabels: function (chart) { + const dataset = chart.data.datasets[0] + return chart.data.labels.map((label, i) => ({ + text: `${label}: ${dataset.data[i]}`, + fillStyle: dataset.backgroundColor[i], + hidden: false, + index: i, + })) + } + } + }, + tooltip: { + callbacks: { + label: function (context) { + const label = context.label || '' + const value = context.raw + const total = context.dataset.data.reduce((a, b) => a + b, 0) + const percentage = ((value / total) * 100).toFixed(1) + + return `${label}: ${value} (${percentage}%)` + } + } + } + } + } + }) + } + + displayChart('totalRegistrationChart', totalChartData) + displayChart('inPersonRegistrationChart', inPersonChartData) +}) diff --git a/ietf/static/js/meeting_timeline.js b/ietf/static/js/meeting_timeline.js new file mode 100644 index 0000000000..161cead0ec --- /dev/null +++ b/ietf/static/js/meeting_timeline.js @@ -0,0 +1,84 @@ +// Copyright The IETF Trust 2026, All Rights Reserved +document.addEventListener('DOMContentLoaded', () => { + // ── Safely parse JSON data injected from Django view ── + const totalChartData = JSON.parse(document.getElementById('total-chart-data').textContent) + const inPersonChartData = JSON.parse(document.getElementById('in-person-chart-data').textContent) + const statsType = JSON.parse(document.getElementById('stats-type-data').textContent) + const stackedLines = statsType === 'total' + + function displayChart (id, data) { + const ctx = document.getElementById(id).getContext('2d') + return new Chart(ctx, { + type: 'line', // Change to 'doughnut' for a donut chart + data: data, + options: { + responsive: true, + scales: { + y: { + stacked: stackedLines, + }, + x: { + title: { + display: true, + text: 'IETF Meeting Number', + }, + }, + }, + plugins: { + legend: { + position: 'bottom', + labels: { + usePointStyle: true, + padding: 15, + font: { size: 12 }, + }, + }, + tooltip: { + backgroundColor: 'rgba(0,0,0,0.8)', + titleFont: { size: 14 }, + bodyFont: { size: 13 }, + callbacks: { + title: function (items) { + return `IETF Meeting ${items[0].label}` + }, + label: function (context) { + return ` ${context.dataset.label}: ${context.parsed.y} participants` + } + } + }, + zoom: { + zoom: { + wheel: { enabled: true }, // scroll to zoom + pinch: { enabled: true }, // pinch on mobile + drag: { enabled: true }, // drag to select range + mode: 'xy', // zoom X-axis and Y-axis + }, + pan: { + enabled: true, + mode: 'xy', // pan X-axis and Y-axis + }, + }, + } + } + }) + } + + const totalChart = displayChart('totalRegistrationChart', totalChartData) + if (inPersonChartData !== null) { + inPersonChart = displayChart('inPersonRegistrationChart', inPersonChartData) + } + document.addEventListener('keydown', (event) => { + if (event.key === 'Escape') { + totalChart.resetZoom() + if (inPersonChart !== null) { + inPersonChart.resetZoom() + } + } + }) + document.getElementById('resetButton').addEventListener('click', () => { + totalChart.resetZoom() + if (inPersonChart !== null) { + inPersonChart.resetZoom() + } + }) +}) diff --git a/ietf/static/js/navbar-doc-search.js b/ietf/static/js/navbar-doc-search.js new file mode 100644 index 0000000000..c36c032310 --- /dev/null +++ b/ietf/static/js/navbar-doc-search.js @@ -0,0 +1,113 @@ +$(function () { + var $input = $('#navbar-doc-search'); + var $results = $('#navbar-doc-search-results'); + var ajaxUrl = $input.data('ajax-url'); + var debounceTimer = null; + var highlightedIndex = -1; + var keyboardHighlight = false; + var currentItems = []; + + function showDropdown() { + $results.addClass('show'); + } + + function hideDropdown() { + $results.removeClass('show'); + highlightedIndex = -1; + keyboardHighlight = false; + updateHighlight(); + } + + function updateHighlight() { + $results.find('.dropdown-item').removeClass('active'); + if (highlightedIndex >= 0 && highlightedIndex < currentItems.length) { + $results.find('.dropdown-item').eq(highlightedIndex).addClass('active'); + } + } + + function doSearch(query) { + if (query.length < 2) { + hideDropdown(); + return; + } + $.ajax({ + url: ajaxUrl, + dataType: 'json', + data: { q: query }, + success: function (data) { + currentItems = data; + highlightedIndex = -1; + $results.empty(); + if (data.length === 0) { + $results.append('
  • No results found
  • '); + } else { + data.forEach(function (item) { + var $li = $('
  • '); + var $a = $('' + item.text + ''); + $li.append($a); + $results.append($li); + }); + } + showDropdown(); + } + }); + } + + $input.on('input', function () { + clearTimeout(debounceTimer); + var query = $(this).val().trim(); + debounceTimer = setTimeout(function () { + doSearch(query); + }, 250); + }); + + $input.on('keydown', function (e) { + if (e.key === 'ArrowDown') { + e.preventDefault(); + if (highlightedIndex < currentItems.length - 1) { + highlightedIndex++; + keyboardHighlight = true; + updateHighlight(); + } + } else if (e.key === 'ArrowUp') { + e.preventDefault(); + if (highlightedIndex > 0) { + highlightedIndex--; + keyboardHighlight = true; + updateHighlight(); + } + } else if (e.key === 'Enter') { + e.preventDefault(); + if (keyboardHighlight && highlightedIndex >= 0 && highlightedIndex < currentItems.length) { + window.location.href = currentItems[highlightedIndex].url; + } else { + var query = $(this).val().trim(); + if (query) { + window.location.href = '/doc/search/?name=' + encodeURIComponent(query) + '&rfcs=on&activedrafts=on&olddrafts=on'; + } + } + } else if (e.key === 'Escape') { + hideDropdown(); + $input.blur(); + } + }); + + // Hover highlights (visual only — Enter still submits the text) + $results.on('mouseenter', '.dropdown-item', function () { + highlightedIndex = $results.find('.dropdown-item').index(this); + keyboardHighlight = false; + updateHighlight(); + }); + + $results.on('mouseleave', '.dropdown-item', function () { + highlightedIndex = -1; + updateHighlight(); + }); + + // Click outside closes dropdown + $(document).on('click', function (e) { + if (!$(e.target).closest('#navbar-doc-search-wrapper').length) { + hideDropdown(); + } + }); +}); diff --git a/ietf/stats/tests.py b/ietf/stats/tests.py index 48552c8fba..373f06e343 100644 --- a/ietf/stats/tests.py +++ b/ietf/stats/tests.py @@ -4,12 +4,14 @@ import calendar import json +import datetime from pyquery import PyQuery import debug # pyflakes:ignore from django.urls import reverse as urlreverse +from django.utils import timezone from ietf.utils.test_utils import login_testing_unauthorized, TestCase import ietf.stats.views @@ -18,24 +20,73 @@ from ietf.group.factories import RoleFactory from ietf.person.factories import PersonFactory from ietf.review.factories import ReviewRequestFactory, ReviewerSettingsFactory, ReviewAssignmentFactory +from ietf.meeting.tests_models import MeetingFactory, RegistrationFactory from ietf.utils.timezone import date_today class StatisticsTests(TestCase): def test_stats_index(self): + # Create a meeting as the index page needs to know the current meeting + MeetingFactory(type_id='ietf', number='124', date=timezone.now()) url = urlreverse(ietf.stats.views.stats_index) r = self.client.get(url) self.assertEqual(r.status_code, 200) def test_document_stats(self): - r = self.client.get(urlreverse("ietf.stats.views.document_stats")) - self.assertRedirects(r, urlreverse("ietf.stats.views.stats_index")) - + # Create a meeting as the index page needs to know the current meeting + MeetingFactory(type_id='ietf', number='124', date=timezone.now()) + r = self.client.get(urlreverse(ietf.stats.views.document_stats)) + self.assertRedirects(r, urlreverse(ietf.stats.views.stats_index)) def test_meeting_stats(self): - r = self.client.get(urlreverse("ietf.stats.views.meeting_stats")) - self.assertRedirects(r, urlreverse("ietf.stats.views.stats_index")) - + meeting124 = MeetingFactory(type_id='ietf', number='124', date=timezone.now()) + meeting125 = MeetingFactory(type_id='ietf', number='125', date=timezone.now() + datetime.timedelta(days=120)) + RegistrationFactory.create_batch(15, meeting=meeting124, with_ticket={'attendance_type_id': 'onsite'}, attended=True) + RegistrationFactory(meeting=meeting124, with_ticket={'attendance_type_id': 'onsite'}, attended=False) + RegistrationFactory.create_batch(14, meeting=meeting124, with_ticket={'attendance_type_id': 'remote'}, attended=True) + RegistrationFactory(meeting=meeting124, with_ticket={'attendance_type_id': 'remote'}, attended=False) + RegistrationFactory.create_batch(15, meeting=meeting125, affiliation='Test LLC', with_ticket={'attendance_type_id': 'remote'}, attended=False) + RegistrationFactory.create_batch(25, meeting=meeting125, affiliation='Example, Ltd', with_ticket={'attendance_type_id': 'onsite'}, attended=False) + # Test the meeting specific statitistics per affiliation and per country + r = self.client.get(urlreverse(ietf.stats.views.meeting_stats, kwargs={"meeting_number": "124", "stats_type": "affiliation"})) + self.assertEqual(r.status_code, 200) + self.assertContains(r, "Total Registrations by Affiliation (31 in total)") + self.assertContains(r, "In Person Registrations by Affiliation (16 in total)") + self.assertContains(r, "/stats/meeting/124/affiliation") + self.assertContains(r, "/stats/meeting/125/affiliation") + r = self.client.get(urlreverse(ietf.stats.views.meeting_stats, kwargs={"meeting_number": "124", "stats_type": "country"})) + self.assertEqual(r.status_code, 200) + self.assertContains(r, "Total Registrations by Country (31 in total)") + self.assertContains(r, "In Person Registrations by Country (16 in total)") + self.assertContains(r, "/stats/meeting/124/country") + self.assertContains(r, "/stats/meeting/125/country") + # Test the meetings timeline per country + r = self.client.get(urlreverse(ietf.stats.views.meetings_timeline, kwargs={"stats_type": "country"})) + self.assertEqual(r.status_code, 200) + self.assertContains(r, "/stats/meeting/124/country") + self.assertContains(r, "/stats/meeting/125/country") + self.assertContains(r, "This page provides a timeline of meeting registrations by country") + # Test the meetings timeline per affiliation + r = self.client.get(urlreverse(ietf.stats.views.meetings_timeline, kwargs={"stats_type": "affiliation"})) + self.assertEqual(r.status_code, 200) + self.assertContains(r, "/stats/meeting/124/affiliation") + self.assertContains(r, "/stats/meeting/125/affiliation") + self.assertContains(r, "This page provides a timeline of meeting registrations by affiliation") + # Extract the JSON embedded in the response + pq = PyQuery(r.content) + in_person_data = json.loads(pq.find("script#in-person-chart-data").text()) + self.assertTrue( + any( + ds["label"] == "Example" and ds["data"] == [0, 25] + for ds in in_person_data["datasets"] + ) + ) + # Test the global meetings timeline + r = self.client.get(urlreverse(ietf.stats.views.meetings_timeline, kwargs={"stats_type": "total"})) + self.assertEqual(r.status_code, 200) + self.assertContains(r, "/stats/meeting/124/country") + self.assertContains(r, "/stats/meeting/125/country") + self.assertContains(r, "This page provides a timeline of meeting registrations.") def test_known_country_list(self): # check redirect diff --git a/ietf/stats/urls.py b/ietf/stats/urls.py index d2993759d2..01b8758c84 100644 --- a/ietf/stats/urls.py +++ b/ietf/stats/urls.py @@ -11,7 +11,8 @@ url(r"^$", views.stats_index), url(r"^document/(?:(?Pauthors|pages|words|format|formlang|author/(?:documents|affiliation|country|continent|citations|hindex)|yearly/(?:affiliation|country|continent))/)?$", views.document_stats), url(r"^knowncountries/$", views.known_countries_list), - url(r"^meeting/(?P\d+)/(?Pcountry|continent)/$", views.meeting_stats), - url(r"^meeting/(?:(?Poverview|country|continent)/)?$", views.meeting_stats), + url(r"^meeting/$", views.meetings_timeline), + url(r"^meeting/(?P\d+)/(?Paffiliation|country)/$", views.meeting_stats), + url(r"^meeting/(?:(?Paffiliation|country|total)/)?$", views.meetings_timeline), url(r"^review/(?:(?Pcompletion|results|states|time)/)?(?:%(acronym)s/)?$" % settings.URL_REGEXPS, views.review_stats), ] diff --git a/ietf/stats/views.py b/ietf/stats/views.py index 504d84e86d..d61b673075 100644 --- a/ietf/stats/views.py +++ b/ietf/stats/views.py @@ -9,11 +9,13 @@ import dateutil.relativedelta from collections import defaultdict +from django.conf import settings from django.contrib.auth.decorators import login_required +from django.core.cache import cache from django.http import HttpResponseRedirect from django.shortcuts import render from django.urls import reverse as urlreverse - +from django.db.models import Count import debug # pyflakes:ignore @@ -25,15 +27,32 @@ from ietf.group.models import Role, Group from ietf.person.models import Person from ietf.name.models import ReviewResultName, CountryName, ReviewAssignmentStateName +from ietf.meeting.models import Registration from ietf.ietfauth.utils import has_role from ietf.utils.response import permission_denied from ietf.utils.timezone import date_today, DEADLINE_TZINFO +from ietf.meeting.helpers import get_current_ietf_meeting_num, get_ietf_meeting +# Color palette for lines +colors = [ + '#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0', '#9966FF', + '#FF9F40', '#C9CBCF', '#7BC043', '#F37735', '#00ABA9', + '#2B5797', '#E81123', '#00A4EF', '#7FBA00', '#FFB900', + '#D83B01', '#B4009E', '#5C2D91', '#008575', '#E3008C', +] def stats_index(request): - return render(request, "stats/index.html") + """Render the statistics index page with the current meeting number as it is required by the meeting menu item.""" + current_meeting = get_current_ietf_meeting_num() + return render(request, "stats/index.html", { + "current_meeting": current_meeting + }) def generate_query_string(query_dict, overrides): + """ + Returns: + A query string starting with '?' if there are parameters, empty string otherwise. + """ query_part = "" if query_dict or overrides: @@ -58,9 +77,20 @@ def generate_query_string(query_dict, overrides): return query_part def get_choice(request, get_parameter, possible_choices, multiple=False): - # the statistics are built with links to make navigation faster, - # so we don't really have a form in most cases, so just use this - # helper instead to select between the choices + """Extract a choice from the request GET parameters. + + Since statistics pages use links for navigation instead of forms, + this helper selects between possible choices from the URL parameters. + + Args: + request: The HTTP request object. + get_parameter: The name of the GET parameter. + possible_choices: List of tuples (value, label). + multiple: If True, return a list of found values; otherwise return the first found or None. + + Returns: + The selected value(s) or None. + """ values = request.GET.getlist(get_parameter) found = [t[0] for t in possible_choices if t[0] in values] @@ -73,75 +103,553 @@ def get_choice(request, get_parameter, possible_choices, multiple=False): return None def add_url_to_choices(choices, url_builder): + """Add URLs to a list of choices. + + Args: + choices: List of tuples (slug, label). + url_builder: Function that takes a slug and returns a URL. + + Returns: + List of tuples (slug, label, url). + """ return [ (slug, label, url_builder(slug)) for slug, label in choices] -def put_into_bin(value, bin_size): - if value is None: - return (0, '') +def document_stats(request, stats_type=None): + # timeline per year, or per specific year: streams, affiliation, rfc vs I-D + # could also be time between individual/WG I-D to rfc publication/IESG ballot + # DISCUSS resolution time + # Humm also split by authors (affiliation) / documents (the rest) probably + """Redirect to the stats index page. Deprecated view.""" + return HttpResponseRedirect(urlreverse("ietf.stats.views.stats_index")) - v = (value // bin_size) * bin_size - return (v, "{} - {}".format(v, v + bin_size - 1)) +def known_countries_list(request, stats_type=None, acronym=None): + """Render a list of known countries with their aliases.""" + countries = CountryName.objects.prefetch_related("countryalias_set") + for c in countries: + # the sorting is a bit of a hack - it puts the ISO code first + # since it was added in a migration + c.aliases = sorted(c.countryalias_set.all(), key=lambda a: a.pk) -def prune_unknown_bin_with_known(bins): - # remove from the unknown bin all authors within the - # named/known bins - all_known = { n for b, names in bins.items() if b for n in names } - bins[""] = [name for name in bins[""] if name not in all_known] - if not bins[""]: - del bins[""] + return render(request, "stats/known_countries_list.html", { + "countries": countries, + }) -def count_bins(bins): - return len({ n for b, names in bins.items() if b for n in names }) +def canonicalize_affiliation(affiliation): + """Canonicalize an affiliation string by removing common suffixes and standardizing prefixes. + + Args: + affiliation: The affiliation string to canonicalize. + + Returns: + The canonicalized affiliation string, or None if input is None. + """ + if not affiliation or affiliation.lower() in ('n/a', 'none', 'unspecified'): + return None + for suffix in ('ab', 'ag', 'corp', 'corp.', 'corporation', 'gmbh', 'inc.', 'inc', 'international pte ltd', 'llc', 'ltd', 'ltd.', 'private limited', 'pty ltd', 'pvt ltd'): + if affiliation.lower().endswith(', ' + suffix): + affiliation = affiliation[:-(len(suffix)+2)] + elif affiliation.lower().endswith(' ' + suffix): + affiliation = affiliation[:-(len(suffix)+1)] + elif affiliation.lower().endswith(',' + suffix): + affiliation = affiliation[:-(len(suffix)+1)] + for prefix in ('akamai','apple', 'cisco', 'futurewei', 'google', 'hitachi', 'hpe', 'huawei', 'juniper', 'meta', 'nokia', 'ntt', 'siemens'): + if affiliation.lower().startswith(prefix + ' '): + affiliation = prefix + return affiliation.title() + +def get_affiliation_data_for_meetings(attendance_type=None): + """Get affiliation participation data for meetings timeline chart. + + Args: + attendance_type: Optional filter for attendance type (e.g., 'onsite'). + + Returns: + Tuple of (sorted_meetings, datasets) for Chart.js. + """ + cache_key = f'stats:get_affiliation_data_for_meetings:{attendance_type}' + sorted_meetings, datasets = cache.get(cache_key, (None, None)) + if (sorted_meetings, datasets) == (None, None): + top_n = 20 # could be a parameter, but would need to adjust cache handling + + # Get registration status details + if attendance_type: + registrations = Registration.objects.filter(tickets__attendance_type=attendance_type) + else: + registrations = Registration.objects.all() + registrations = registrations.values('affiliation', 'meeting__number') + + # Count per canonicalized affiliation + organization = dict() + meetings_set = set() + org_totals = defaultdict(int) + data_map = defaultdict(dict) # {org: {meeting: count}} + + for reg in registrations: + meeting = reg['meeting__number'] + meetings_set.add(meeting) + affiliation = canonicalize_affiliation(reg['affiliation']) or "Unspecified" + organization[affiliation] = organization.get(affiliation, 0) + 1 + org_totals[affiliation] = org_totals.get(affiliation, 0) + 1 + data_map[affiliation][meeting] = data_map[affiliation].get(meeting, 0) + 1 + + # ── Step 2: Sort meetings numerically rather than alphabetically ── + sorted_meetings = sorted(meetings_set, key=lambda x: int(x) if x.isdigit() else x) + + # ── Step 3: Get top N countries ── + top_orgs = sorted( + org_totals.keys(), + key=lambda c: org_totals[c], + reverse=True + )[:top_n] + non_top_orgs = org_totals.keys() - top_orgs + other_totals = defaultdict(int) + for m in sorted_meetings: + other_totals[m] = 0 + for c in non_top_orgs: + other_totals[m] += int(data_map[c].get(m, 0)) + + # ── Step 4: Build Chart.js datasets ── + + datasets = [] + for idx, org in enumerate(top_orgs): + color = colors[idx % len(colors)] + datasets.append({ + 'label': org, + 'data': [data_map[org].get(m, 0) for m in sorted_meetings], + 'borderColor': color, + 'fill': False, + 'tension': 0.3, + 'pointColor': color, + 'pointBackgroundColor': color, + 'pointRadius': 4, + 'pointHoverRadius': 6, + 'borderWidth': 2, + }) + + # -- Step 4.bis handle the other -- + datasets.append({ + 'label': 'Other', + 'data': [other_totals.get(m, 0) for m in sorted_meetings], + 'borderColor': 'black', + 'fill': False, + 'tension': 0.3, + 'pointColor': 'black', + 'pointBackgroundColor': 'black', + 'pointRadius': 4, + 'pointHoverRadius': 6, + 'borderWidth': 2, + }) + cache.set( + cache_key, + (sorted_meetings, datasets), + settings.STATS_TIMELINE_CACHE_TIMEOUT, + ) -def add_labeled_top_series_from_bins(chart_data, bins, limit): - """Take bins on the form (x, label): [name1, name2, ...], figure out - how many there are per label, take the overall top ones and put - them into sorted series like [(x1, len(names1)), (x2, len(names2)), ...].""" - aggregated_bins = defaultdict(set) - xs = set() - for (x, label), names in bins.items(): - xs.add(x) - aggregated_bins[label].update(names) + return sorted_meetings, datasets - xs = list(sorted(xs)) +def get_country_data_for_meetings(attendance_type=None): + """Get country participation data for meetings timeline chart. - sorted_bins = sorted(aggregated_bins.items(), key=lambda t: len(t[1]), reverse=True) - top = [ label for label, names in list(sorted_bins)[:limit]] + Args: + attendance_type: Optional filter for attendance type (e.g., 'onsite'). - for label in top: - series_data = [] + Returns: + Tuple of (sorted_meetings, datasets) for Chart.js. + """ + cache_key = f'stats:get_country_data_for_meetings:{attendance_type}' + sorted_meetings, datasets = cache.get(cache_key, (None, None)) + if (sorted_meetings, datasets) == (None, None): + top_n = 10 # could be a parameter, but would need to adjust cache handling + # Get registration status counts, aggregated by country_code + if attendance_type: + registrations = Registration.objects.filter(tickets__attendance_type=attendance_type) + else: + registrations = Registration.objects.all() + queryset = ( + registrations + .values( + 'meeting__number', # e.g. "118", "119", "120" + 'country_code' # country code of the participant + ) + .annotate(participant_count=Count('id')) + .order_by('meeting__number') # chronological order + ) + + # ── Step 1: Collect all meetings and country totals ── + meetings_set = set() + country_totals = defaultdict(int) + data_map = defaultdict(dict) # {country: {meeting: count}} + + for row in queryset: + meeting = row['meeting__number'] + country = row['country_code'] + count = row['participant_count'] + + meetings_set.add(meeting) + country_totals[country] += count + data_map[country][meeting] = count + + # ── Step 2: Sort meetings numerically rather than alphabetically ── + sorted_meetings = sorted(meetings_set, key=lambda x: int(x) if x.isdigit() else x) + + # ── Step 3: Get top N countries ── + top_countries = sorted( + country_totals.keys(), + key=lambda c: country_totals[c], + reverse=True + )[:top_n] + + # -- Step 3.bis do the 'other' category -- + non_top_countries = country_totals.keys() - top_countries + other_totals = defaultdict(int) + for m in sorted_meetings: + other_totals[m] = 0 + for c in non_top_countries: + other_totals[m] += int(data_map[c].get(m, 0)) + + # ── Step 4: Build Chart.js datasets ── + + datasets = [] + for idx, country in enumerate(top_countries): + color = colors[idx % len(colors)] + datasets.append({ + 'label': country, + 'data': [data_map[country].get(m, 0) for m in sorted_meetings], + 'borderColor': color, + 'fill': False, + 'tension': 0.3, + 'pointColor': color, + 'pointBackgroundColor': color, + 'pointRadius': 4, + 'pointHoverRadius': 6, + 'borderWidth': 2, + }) + + # -- Step 4.bis handle the other -- + datasets.append({ + 'label': 'Other', + 'data': [other_totals.get(m, 0) for m in sorted_meetings], + 'borderColor': 'black', + 'fill': False, + 'tension': 0.3, + 'pointColor': 'black', + 'pointBackgroundColor': 'black', + 'pointRadius': 4, + 'pointHoverRadius': 6, + 'borderWidth': 2, + }) + cache.set( + cache_key, + (sorted_meetings, datasets), + settings.STATS_TIMELINE_CACHE_TIMEOUT, + ) - for x in xs: - names = bins.get((x, label), set()) + return sorted_meetings, datasets + +def get_data_for_meetings(): + """Get total participation data by attendance type for meetings timeline chart. + + Returns: + Tuple of (sorted_meetings, datasets) for Chart.js. + """ + cache_key = "stats:get_data_for_meetings" + sorted_meetings, datasets = cache.get(cache_key, (None, None)) + if (sorted_meetings, datasets) == (None, None): + # Get registration status counts, aggregated by ticket types + registrations = Registration.objects.filter(tickets__attendance_type__in=['onsite', 'remote']) + queryset = ( + registrations + .values( + 'meeting__number', # e.g. "118", "119", "120" + 'tickets__attendance_type' + ) + .annotate(participant_count=Count('id')) + .order_by('meeting__number') # chronological order + ) + + # ── Step 1: Collect all meetings and tickets totals ── + meetings_set = set() + tickets_totals = defaultdict(int) + data_map = defaultdict(dict) # {ticket: {meeting: count}} + + for row in queryset: + meeting = row['meeting__number'] + ticket = row['tickets__attendance_type'] + count = row['participant_count'] + + meetings_set.add(meeting) + tickets_totals[ticket] += count + data_map[ticket][meeting] = count + + # ── Step 2: Sort meetings numerically rather than alphabetically ── + sorted_meetings = sorted(meetings_set, key=lambda x: int(x) if x.isdigit() else x) + ticket_types = tickets_totals.keys() + + # ── Step 4: Build Chart.js datasets ── + # Color palette for lines + colors = [ '#FF6384', '#36A2EB'] + + datasets = [] + for idx, ticket_type in enumerate(ticket_types): + color = colors[idx % len(colors)] + datasets.append({ + 'label': ticket_type, + 'data': [data_map[ticket_type].get(m, 0) for m in sorted_meetings], + 'borderColor': color, + 'backgroundColor': color + '99', # 60% opacity fill + 'fill': True, + 'tension': 0.0, + 'pointColor': color, + 'pointBackgroundColor': color, + 'pointRadius': 4, + 'pointHoverRadius': 6, + 'borderWidth': 2, + }) + cache.set( + cache_key, + (sorted_meetings, datasets), + settings.STATS_TIMELINE_CACHE_TIMEOUT, + ) + return sorted_meetings, datasets + +def meetings_timeline(request, stats_type='country'): + """Render the meetings timeline page with participation statistics over time. + + Args: + request: The HTTP request object. + stats_type: Type of statistics ('country' or 'total'). + top_n: Number of top items to show (for country stats). + + Returns: + Rendered response for the meetings timeline template. + """ + if stats_type == 'total': + total_labels, total_data_sets = get_data_for_meetings() + in_person_labels = ([], []) + in_person_data_sets = ([], []) + top_n = len(total_data_sets) - 1 # subtract one because we don't count "other" + elif stats_type == 'affiliation': + total_labels, total_data_sets = get_affiliation_data_for_meetings() + in_person_labels, in_person_data_sets = get_affiliation_data_for_meetings(attendance_type='onsite') + top_n = len(total_data_sets) - 1 # subtract one because we don't count "other" + elif stats_type == 'country': + total_labels, total_data_sets = get_country_data_for_meetings() + in_person_labels, in_person_data_sets = get_country_data_for_meetings(attendance_type='onsite') + top_n = len(total_data_sets) - 1 # subtract one because we don't count "other" + else: + return HttpResponseRedirect(urlreverse("ietf.stats.views.stats_index")) - series_data.append((x, len(names))) + total_chart_data = { + 'labels': total_labels, + 'datasets': total_data_sets, + } - chart_data.append({ - "data": series_data, - "name": label - }) + # On per country/affiliation have a separate graph for inperson + if stats_type == 'total': + in_person_chart_data = None + else: + in_person_chart_data = { + 'labels': in_person_labels, + 'datasets': in_person_data_sets, + } -def document_stats(request, stats_type=None): - return HttpResponseRedirect(urlreverse("ietf.stats.views.stats_index")) + # Prepare the list of choice buttons for the template + possible_stats_types = [ + ("affiliation", "Per affiliation", urlreverse(meetings_timeline, kwargs={'stats_type': 'affiliation'})), + ("country", "Per country", urlreverse(meetings_timeline, kwargs={'stats_type': 'country'})), + ("total", "Total", urlreverse(meetings_timeline, kwargs={'stats_type': 'total'})), + ] + current_meeting = get_current_ietf_meeting_num() + if stats_type == 'total': + possible_stats_type = 'country' + else: + possible_stats_type = stats_type -def known_countries_list(request, stats_type=None, acronym=None): - countries = CountryName.objects.prefetch_related("countryalias_set") - for c in countries: - # the sorting is a bit of a hack - it puts the ISO code first - # since it was added in a migration - c.aliases = sorted(c.countryalias_set.all(), key=lambda a: a.pk) + possible_meeting_numbers = [(int(current_meeting)-1, urlreverse(meeting_stats, kwargs={'meeting_number': int(current_meeting)-1, 'stats_type': possible_stats_type})), + (int(current_meeting), urlreverse(meeting_stats, kwargs={'meeting_number': int(current_meeting), 'stats_type': possible_stats_type})), + (int(current_meeting)+1, urlreverse(meeting_stats, kwargs={'meeting_number': int(current_meeting)+1, 'stats_type': possible_stats_type}))] - return render(request, "stats/known_countries_list.html", { - "countries": countries, + return render(request, "stats/meetings_timeline.html", { + "top_n": top_n, + "possible_stats_types": possible_stats_types, + "possible_meeting_numbers": possible_meeting_numbers, + "stats_type": stats_type, + "total_chart_data": total_chart_data, + "in_person_chart_data": in_person_chart_data, }) -def meeting_stats(request, num=None, stats_type=None): - return HttpResponseRedirect(urlreverse("ietf.stats.views.stats_index")) +def get_affiliation_data_for_meeting(meeting_number, minimum_required, attendance_type=None): + """Get affiliation participation data for a specific meeting. + + Args: + meeting_number: The meeting number. + minimum_required: Minimum count to include in main data (others go to 'Other'). + attendance_type: Optional filter for attendance type. + + Returns: + Tuple of (labels, data, total) for chart display. + """ + # Get registration status details + registrations = Registration.objects.filter(meeting__number=meeting_number) + if attendance_type: + registrations = registrations.filter(tickets__attendance_type=attendance_type) + registrations = registrations.values('affiliation') + + # Count per canonicalized affiliation + organization = dict() + for reg in registrations: + affiliation = canonicalize_affiliation(reg['affiliation']) or "Unspecified" + organization[affiliation] = organization.get(affiliation, 0) + 1 + + # Sort to have the largest count first (nicer in pie chart) + sorted_orgs = sorted(organization.items(), key=lambda t: t[1], reverse=True) + labels = [] + data = [] + others_count = 0 + total = 0 + for org, count in sorted_orgs: + total += count + if count > minimum_required: + labels.append(org) + data.append(count) + else: + others_count += count + + if others_count > 0: + labels.append('Other') + data.append(others_count) + + return labels, data, total + +def get_data_for_meeting(meeting_number, minimum_required, attendance_type=None): + """Get country participation data for a specific meeting. + + Args: + meeting_number: The meeting number. + minimum_required: Minimum count to include in main data (others go to 'Other'). + attendance_type: Optional filter for attendance type. + + Returns: + Tuple of (labels, data, total) for chart display. + """ + # Get registration status counts, aggregated by country_code + registration_counts = Registration.objects.filter(meeting__number=meeting_number) + if attendance_type: + registration_counts = registration_counts.filter(tickets__attendance_type=attendance_type) + registration_counts = registration_counts.values('country_code').annotate(count=Count('country_code')).order_by('-count') + + labels = [] + data = [] + others_count = 0 + total = 0 + for item in registration_counts: + total += item['count'] + if item['count'] > minimum_required: + labels.append(item['country_code']) + data.append(item['count']) + else: + others_count += item['count'] + + if others_count > 0: + labels.append('Other') + data.append(others_count) + + return labels, data, total + +def meeting_stats(request, meeting_number=None, stats_type='country'): + """Render statistics for a specific meeting. + + Args: + request: The HTTP request object. + meeting_number: The meeting number (defaults to current). + stats_type: Type of statistics ('country' or 'affiliation'). + + Returns: + Rendered response for the meeting stats template. + """ + + current_meeting = get_current_ietf_meeting_num() + if meeting_number is None: + meeting_number = current_meeting + + this_meeting = get_ietf_meeting(meeting_number) + + if stats_type == 'affiliation': + minimum_required = 4 + total_labels, total_data, total_total = get_affiliation_data_for_meeting(meeting_number, minimum_required) + in_person_labels, in_person_data, in_person_total = get_affiliation_data_for_meeting(meeting_number, minimum_required, attendance_type='onsite') + elif stats_type == 'country': + minimum_required = 10 + total_labels, total_data, total_total = get_data_for_meeting(meeting_number, minimum_required) + in_person_labels, in_person_data, in_person_total = get_data_for_meeting(meeting_number, minimum_required, attendance_type='onsite') + else: + return HttpResponseRedirect(urlreverse("ietf.stats.views.stats_index")) + + total_chart_data = { + 'labels': total_labels, + 'datasets': [{ + 'label': 'Total Registrations by ' + stats_type, + 'data': total_data, + 'borderColor': '#ffffff', + 'borderWidth': 2, + }] + } + in_person_chart_data = { + 'labels': in_person_labels, + 'datasets': [{ + 'label': 'In Person Registrations by ' + stats_type, + 'data': in_person_data, + 'borderColor': '#ffffff', + 'borderWidth': 2, + }] + } + + # Prepare the list of choice buttons for the template + possible_stats_types = [ + ("affiliation", "Per affiliation", urlreverse(meeting_stats, kwargs={'meeting_number': meeting_number, 'stats_type': 'affiliation'})), + ("country", "Per country", urlreverse(meeting_stats, kwargs={'meeting_number': meeting_number, 'stats_type': 'country'})), + ] + + # Prepare the list of meeting number buttons for the template + possible_meeting_numbers = [('All', urlreverse(meetings_timeline, kwargs={'stats_type': stats_type}))] + if int(meeting_number) > 72: # No registration data before IETF-72 + possible_meeting_numbers.append((int(meeting_number)-1, urlreverse(meeting_stats, kwargs={'meeting_number': int(meeting_number)-1, 'stats_type': stats_type}))) + possible_meeting_numbers.append((meeting_number, urlreverse(meeting_stats, kwargs={'meeting_number': meeting_number, 'stats_type': stats_type}))) + if int(meeting_number) <= int(current_meeting): # Allow current meeting +1 + possible_meeting_numbers.append((int(meeting_number)+1, urlreverse(meeting_stats, kwargs={'meeting_number': int(meeting_number)+1, 'stats_type': stats_type}))) + + return render(request, "stats/meeting_stats.html", { + "meeting_number": meeting_number, + "meeting_date": this_meeting.date, + "meeting_country": this_meeting.country, + "meeting_city": this_meeting.city, + "possible_stats_types": possible_stats_types, + "possible_meeting_numbers": possible_meeting_numbers, + "stats_type": stats_type, + "minimum_required": minimum_required, + "total_chart_data": total_chart_data, + "total_total": total_total, + "in_person_chart_data": in_person_chart_data, + "in_person_total": in_person_total + }) @login_required def review_stats(request, stats_type=None, acronym=None): + """Render review statistics page with tables and charts for review assignments. + + Shows completion status, results, assignment states, and time series data. + Supports both team-level and reviewer-level views with filtering options. + + Args: + request: The HTTP request object. + stats_type: Type of statistics ('completion', 'results', 'states', 'time'). + acronym: Team acronym for reviewer-level view (None for team view). + + Returns: + Rendered response for the review stats template. + """ # This view is a bit complex because we want to show a bunch of # tables with various filtering options, and both a team overview # and a reviewers-within-team overview - and a time series chart. diff --git a/ietf/sync/errata.py b/ietf/sync/errata.py new file mode 100644 index 0000000000..113d987291 --- /dev/null +++ b/ietf/sync/errata.py @@ -0,0 +1,184 @@ +# Copyright The IETF Trust 2026, All Rights Reserved +import datetime +import json +from collections import defaultdict +from typing import DefaultDict + +from django.conf import settings +from django.core.files.storage import storages +from django.db import transaction +from django.db.models import Q + +from ietf.doc.models import Document, DocEvent +from ietf.name.models import DocTagName +from ietf.person.models import Person +from ietf.utils.log import log +from ietf.utils.models import DirtyBits + + +DEFAULT_ERRATA_JSON_BLOB_NAME = "other/errata.json" + +type ErrataJsonEntry = dict[str, str] + +def get_errata_last_updated() -> datetime.datetime: + """Get timestamp of the last errata.json update + + May raise FileNotFoundError or other storage/S3 exceptions. Be prepared. + """ + red_bucket = storages["red_bucket"] + return red_bucket.get_modified_time( + getattr(settings, "ERRATA_JSON_BLOB_NAME", DEFAULT_ERRATA_JSON_BLOB_NAME) + ) + + +def get_errata_data() -> list[ErrataJsonEntry]: + red_bucket = storages["red_bucket"] + with red_bucket.open( + getattr(settings, "ERRATA_JSON_BLOB_NAME", DEFAULT_ERRATA_JSON_BLOB_NAME), "r" + ) as f: + errata_data = json.load(f) + return errata_data + + +def errata_map_from_json(errata_data: list[ErrataJsonEntry]): + """Create a dict mapping RFC number to a list of applicable errata records""" + errata = defaultdict(list) + for item in errata_data: + doc_id = item["doc-id"] + if doc_id.upper().startswith("RFC"): + rfc_number = int(doc_id[3:]) + errata[rfc_number].append(item) + return dict(errata) + + +def update_errata_tags(errata_data: list[ErrataJsonEntry]): + tag_has_errata = DocTagName.objects.get(slug="errata") + tag_has_verified_errata = DocTagName.objects.get(slug="verified-errata") + system = Person.objects.get(name="(System)") + + errata_map = errata_map_from_json(errata_data) + nums_with_errata = [ + num + for num, errata in errata_map.items() + if any(er["errata_status_code"] != "Rejected" for er in errata) + ] + nums_with_verified_errata = [ + num + for num, errata in errata_map.items() + if any(er["errata_status_code"] == "Verified" for er in errata) + ] + + rfcs_gaining_errata_tag = Document.objects.filter( + type_id="rfc", rfc_number__in=nums_with_errata + ).exclude(tags=tag_has_errata) + + rfcs_gaining_verified_errata_tag = Document.objects.filter( + type_id="rfc", rfc_number__in=nums_with_verified_errata + ).exclude(tags=tag_has_verified_errata) + + rfcs_losing_errata_tag = Document.objects.filter( + type_id="rfc", tags=tag_has_errata + ).exclude(rfc_number__in=nums_with_errata) + + rfcs_losing_verified_errata_tag = Document.objects.filter( + type_id="rfc", tags=tag_has_verified_errata + ).exclude(rfc_number__in=nums_with_verified_errata) + + # map rfc_number to add/remove lists + changes: DefaultDict[Document, dict[str, list[DocTagName]]] = defaultdict( + lambda: {"add": [], "remove": []} + ) + for rfc in rfcs_gaining_errata_tag: + changes[rfc]["add"].append(tag_has_errata) + for rfc in rfcs_gaining_verified_errata_tag: + changes[rfc]["add"].append(tag_has_verified_errata) + for rfc in rfcs_losing_errata_tag: + changes[rfc]["remove"].append(tag_has_errata) + for rfc in rfcs_losing_verified_errata_tag: + changes[rfc]["remove"].append(tag_has_verified_errata) + + for rfc, changeset in changes.items(): + # Update in a transaction per RFC to keep tags and DocEvents consistent. + # With this in place, an interrupted task will be cleanly completed on the + # next run. + with transaction.atomic(): + change_descs = [] + for tag in changeset["add"]: + rfc.tags.add(tag) + change_descs.append(f"added {tag.slug} tag") + for tag in changeset["remove"]: + rfc.tags.remove(tag) + change_descs.append(f"removed {tag.slug} tag") + summary = "Update from RFC Editor: " + ", ".join(change_descs) + if rfc.rfc_number in errata_map and all( + er["errata_status_code"] == "Rejected" + for er in errata_map[rfc.rfc_number] + ): + summary += " (all errata rejected)" + DocEvent.objects.create( + doc=rfc, + rev=rfc.rev, # expect no rev + by=system, + type="sync_from_rfc_editor", + desc=summary, + ) + + +def update_errata_from_rfceditor(): + errata_data = get_errata_data() + update_errata_tags(errata_data) + + +## DirtyBits management for the errata tags + +ERRATA_SLUG = DirtyBits.Slugs.ERRATA + + +def update_errata_dirty_time() -> DirtyBits | None: + try: + last_update = get_errata_last_updated() + except Exception as err: + log(f"Error in get_errata_last_updated: {err}") + return None + else: + dirty_work, created = DirtyBits.objects.update_or_create( + slug=ERRATA_SLUG, defaults={"dirty_time": last_update} + ) + if created: + log(f"Created DirtyBits(slug='{ERRATA_SLUG}')") + return dirty_work + + +def mark_errata_as_processed(when: datetime.datetime): + n_updated = DirtyBits.objects.filter( + Q(processed_time__isnull=True) | Q(processed_time__lt=when), + slug=ERRATA_SLUG, + ).update(processed_time=when) + if n_updated > 0: + log(f"processed_time is now {when.isoformat()}") + else: + log("processed_time not updated, no matching record found") + + +def errata_are_dirty(): + """Does the rfc index need to be updated?""" + dirty_work = update_errata_dirty_time() # creates DirtyBits if needed + if dirty_work is None: + # A None indicates we could not check the timestamp of errata.json. In that + # case, we are not likely to be able to read the blob either, so don't try + # to process it. An error was already logged. + return False + display_processed_time = ( + dirty_work.processed_time.isoformat() + if dirty_work.processed_time is not None + else "never" + ) + log( + f"DirtyBits(slug='{ERRATA_SLUG}'): " + f"dirty_time={dirty_work.dirty_time.isoformat()} " + f"processed_time={display_processed_time}" + ) + return ( + dirty_work.processed_time is None + or dirty_work.dirty_time >= dirty_work.processed_time + ) diff --git a/ietf/sync/rfcindex.py b/ietf/sync/rfcindex.py index 63c2044931..d1a0ed432f 100644 --- a/ietf/sync/rfcindex.py +++ b/ietf/sync/rfcindex.py @@ -1,4 +1,5 @@ # Copyright The IETF Trust 2026, All Rights Reserved +import datetime import json from collections import defaultdict from collections.abc import Container @@ -11,6 +12,7 @@ from django.conf import settings from django.core.files.base import ContentFile +from django.db.models import Q from lxml import etree from django.core.files.storage import storages @@ -22,8 +24,11 @@ from ietf.doc.models import Document from ietf.name.models import StdLevelName from ietf.utils.log import log +from ietf.utils.models import DirtyBits FORMATS_FOR_INDEX = ["txt", "html", "pdf", "xml", "ps"] +SS_TXT_MARGIN = 3 +SS_TXT_CUE_COL_WIDTH = 14 def format_rfc_number(n): @@ -148,7 +153,7 @@ def get_publication_std_levels() -> dict[int, StdLevelName]: def format_ordering(rfc_number): - if rfc_number < 8650: + if rfc_number < settings.FIRST_V3_RFC: ordering = ["txt", "ps", "pdf", "html", "xml"] else: ordering = ["html", "txt", "ps", "pdf", "xml"] @@ -267,6 +272,217 @@ def get_rfc_text_index_entries(): return entries +def subseries_text_line(line, first=False): + """Return subseries text entry line""" + indent = " " * SS_TXT_CUE_COL_WIDTH + if first: + initial_indent = " " * SS_TXT_MARGIN + else: + initial_indent = indent + return fill( + line, + initial_indent=initial_indent, + subsequent_indent=indent, + width=80, + break_on_hyphens=False, + ) + + +def get_bcp_text_index_entries(): + """Returns BCP entries for bcp-index.txt""" + entries = [] + + highest_bcp_number = ( + Document.objects.filter(type_id="bcp") + .annotate( + number=Cast( + Substr("name", 4, None), + output_field=models.IntegerField(), + ) + ) + .order_by("-number") + .first() + .number + ) + + for bcp_number in range(1, highest_bcp_number + 1): + bcp_name = f"BCP{bcp_number}" + bcp = Document.objects.filter(type_id="bcp", name=f"{bcp_name.lower()}").first() + + if bcp: + entry = subseries_text_line( + ( + f"[{bcp_name}]" + f"{' ' * (SS_TXT_CUE_COL_WIDTH - len(bcp_name) - 2 - SS_TXT_MARGIN)}" + f"Best Current Practice {bcp_number}," + ), + first=True, + ) + entry += "\n" + entry += subseries_text_line( + f"<{settings.RFC_EDITOR_INFO_BASE_URL}{bcp_name.lower()}>." + ) + entry += "\n" + entry += subseries_text_line( + "At the time of writing, this BCP comprises the following:" + ) + entry += "\n\n" + rfcs = sorted(bcp.contains(), key=lambda x: x.rfc_number) + for rfc in rfcs: + authors = ", ".join( + author.format_for_titlepage() for author in rfc.rfcauthor_set.all() + ) + entry += subseries_text_line( + ( + f'{authors}, "{rfc.title}", BCP¶{bcp_number}, RFC¶{rfc.rfc_number}, ' + f"DOI¶{rfc.doi}, {rfc.pub_date().strftime('%B %Y')}, " + f"<{settings.RFC_EDITOR_INFO_BASE_URL}rfc{rfc.rfc_number}>." + ) + ).replace("¶", " ") + entry += "\n\n" + else: + entry = subseries_text_line( + ( + f"[{bcp_name}]" + f"{' ' * (SS_TXT_CUE_COL_WIDTH - len(bcp_name) - 2 - SS_TXT_MARGIN)}" + f"Best Current Practice {bcp_number} currently contains no RFCs" + ), + first=True, + ) + entries.append(entry) + return entries + + +def get_std_text_index_entries(): + """Returns STD entries for std-index.txt""" + entries = [] + + highest_std_number = ( + Document.objects.filter(type_id="std") + .annotate( + number=Cast( + Substr("name", 4, None), + output_field=models.IntegerField(), + ) + ) + .order_by("-number") + .first() + .number + ) + + for std_number in range(1, highest_std_number + 1): + std_name = f"STD{std_number}" + std = Document.objects.filter(type_id="std", name=f"{std_name.lower()}").first() + + if std and std.contains(): + entry = subseries_text_line( + ( + f"[{std_name}]" + f"{' ' * (SS_TXT_CUE_COL_WIDTH - len(std_name) - 2 - SS_TXT_MARGIN)}" + f"Internet Standard {std_number}," + ), + first=True, + ) + entry += "\n" + entry += subseries_text_line( + f"<{settings.RFC_EDITOR_INFO_BASE_URL}{std_name.lower()}>." + ) + entry += "\n" + entry += subseries_text_line( + "At the time of writing, this STD comprises the following:" + ) + entry += "\n\n" + rfcs = sorted(std.contains(), key=lambda x: x.rfc_number) + for rfc in rfcs: + authors = ", ".join( + author.format_for_titlepage() for author in rfc.rfcauthor_set.all() + ) + entry += subseries_text_line( + ( + f'{authors}, "{rfc.title}", STD¶{std_number}, RFC¶{rfc.rfc_number}, ' + f"DOI¶{rfc.doi}, {rfc.pub_date().strftime('%B %Y')}, " + f"<{settings.RFC_EDITOR_INFO_BASE_URL}rfc{rfc.rfc_number}>." + ) + ).replace("¶", " ") + entry += "\n\n" + else: + entry = subseries_text_line( + ( + f"[{std_name}]" + f"{' ' * (SS_TXT_CUE_COL_WIDTH - len(std_name) - 2 - SS_TXT_MARGIN)}" + f"Internet Standard {std_number} currently contains no RFCs" + ), + first=True, + ) + entries.append(entry) + return entries + + +def get_fyi_text_index_entries(): + """Returns FYI entries for fyi-index.txt""" + entries = [] + + highest_fyi_number = ( + Document.objects.filter(type_id="fyi") + .annotate( + number=Cast( + Substr("name", 4, None), + output_field=models.IntegerField(), + ) + ) + .order_by("-number") + .first() + .number + ) + + for fyi_number in range(1, highest_fyi_number + 1): + fyi_name = f"FYI{fyi_number}" + fyi = Document.objects.filter(type_id="fyi", name=f"{fyi_name.lower()}").first() + + if fyi and fyi.contains(): + entry = subseries_text_line( + ( + f"[{fyi_name}]" + f"{' ' * (SS_TXT_CUE_COL_WIDTH - len(fyi_name) - 2 - SS_TXT_MARGIN)}" + f"For Your Information {fyi_number}," + ), + first=True, + ) + entry += "\n" + entry += subseries_text_line( + f"<{settings.RFC_EDITOR_INFO_BASE_URL}{fyi_name.lower()}>." + ) + entry += "\n" + entry += subseries_text_line( + "At the time of writing, this FYI comprises the following:" + ) + entry += "\n\n" + rfcs = sorted(fyi.contains(), key=lambda x: x.rfc_number) + for rfc in rfcs: + authors = ", ".join( + author.format_for_titlepage() for author in rfc.rfcauthor_set.all() + ) + entry += subseries_text_line( + ( + f'{authors}, "{rfc.title}", FYI¶{fyi_number}, RFC¶{rfc.rfc_number}, ' + f"DOI¶{rfc.doi}, {rfc.pub_date().strftime('%B %Y')}, " + f"<{settings.RFC_EDITOR_INFO_BASE_URL}rfc{rfc.rfc_number}>." + ) + ).replace("¶", " ") + entry += "\n\n" + else: + entry = subseries_text_line( + ( + f"[{fyi_name}]" + f"{' ' * (SS_TXT_CUE_COL_WIDTH - len(fyi_name) - 2 - SS_TXT_MARGIN)}" + f"For Your Information {fyi_number} currently contains no RFCs" + ), + first=True, + ) + entries.append(entry) + return entries + + def add_subseries_xml_index_entries(rfc_index, ss_type, include_all=False): """Add subseries entries for rfc-index.xml""" # subseries docs annotated with numeric number @@ -481,3 +697,95 @@ def create_rfc_xml_index(): pretty_print=4, ) save_to_red_bucket("rfc-index.xml", pretty_index) + + +def create_bcp_txt_index(): + """Create text index of BCPs""" + DATE_FMT = "%m/%d/%Y" + created_on = timezone.now().strftime(DATE_FMT) + log("Creating bcp-index.txt") + index = render_to_string( + "sync/bcp-index.txt", + { + "created_on": created_on, + "bcps": get_bcp_text_index_entries(), + }, + ) + save_to_red_bucket("bcp-index.txt", index) + + +def create_std_txt_index(): + """Create text index of STDs""" + DATE_FMT = "%m/%d/%Y" + created_on = timezone.now().strftime(DATE_FMT) + log("Creating std-index.txt") + index = render_to_string( + "sync/std-index.txt", + { + "created_on": created_on, + "stds": get_std_text_index_entries(), + }, + ) + save_to_red_bucket("std-index.txt", index) + + +def create_fyi_txt_index(): + """Create text index of FYIs""" + DATE_FMT = "%m/%d/%Y" + created_on = timezone.now().strftime(DATE_FMT) + log("Creating fyi-index.txt") + index = render_to_string( + "sync/fyi-index.txt", + { + "created_on": created_on, + "fyis": get_fyi_text_index_entries(), + }, + ) + save_to_red_bucket("fyi-index.txt", index) + + +## DirtyBits management for the RFC index + +RFCINDEX_SLUG = DirtyBits.Slugs.RFCINDEX + + +def mark_rfcindex_as_dirty(): + _, created = DirtyBits.objects.update_or_create( + slug=RFCINDEX_SLUG, defaults={"dirty_time": timezone.now()} + ) + if created: + log(f"Created DirtyBits(slug='{RFCINDEX_SLUG}')") + + +def mark_rfcindex_as_processed(when: datetime.datetime): + n_updated = DirtyBits.objects.filter( + Q(processed_time__isnull=True) | Q(processed_time__lt=when), + slug=RFCINDEX_SLUG, + ).update(processed_time=when) + if n_updated > 0: + log(f"processed_time is now {when.isoformat()}") + else: + log("processed_time not updated, no matching record found") + + +def rfcindex_is_dirty(): + """Does the rfc index need to be updated?""" + dirty_work, created = DirtyBits.objects.get_or_create( + slug=RFCINDEX_SLUG, defaults={"dirty_time": timezone.now()} + ) + if created: + log(f"Created DirtyBits(slug='{RFCINDEX_SLUG}')") + display_processed_time = ( + dirty_work.processed_time.isoformat() + if dirty_work.processed_time is not None + else "never" + ) + log( + f"DirtyBits(slug='{RFCINDEX_SLUG}'): " + f"dirty_time={dirty_work.dirty_time.isoformat()} " + f"processed_time={display_processed_time}" + ) + return ( + dirty_work.processed_time is None + or dirty_work.dirty_time >= dirty_work.processed_time + ) diff --git a/ietf/sync/tasks.py b/ietf/sync/tasks.py index 4c84dc581e..34b2efeb5c 100644 --- a/ietf/sync/tasks.py +++ b/ietf/sync/tasks.py @@ -17,8 +17,20 @@ from ietf.doc.tasks import rebuild_reference_relations_task from ietf.sync import iana from ietf.sync import rfceditor +from ietf.sync.errata import ( + errata_are_dirty, + mark_errata_as_processed, + update_errata_from_rfceditor, +) from ietf.sync.rfceditor import MIN_QUEUE_RESULTS, parse_queue, update_drafts_from_queue -from ietf.sync.rfcindex import create_rfc_txt_index, create_rfc_xml_index +from ietf.sync.rfcindex import ( + create_bcp_txt_index, + create_fyi_txt_index, + create_rfc_txt_index, + create_rfc_xml_index, + create_std_txt_index, + rfcindex_is_dirty, mark_rfcindex_as_processed, mark_rfcindex_as_dirty, +) from ietf.sync.utils import build_from_file_content, load_rfcs_into_blobdb, rsync_helper from ietf.utils import log from ietf.utils.timezone import date_today @@ -27,13 +39,13 @@ @shared_task def rfc_editor_index_update_task(full_index=False): """Update metadata from the RFC index - + Default is to examine only changes in the past 365 days. Call with full_index=True to update the full RFC index. - + According to comments on the original script, a year's worth took about 20s on production as of August 2022 - + The original rfc-editor-index-update script had a long-disabled provision for running the rebuild_reference_relations scripts after the update. That has not been brought over at all because it should be implemented as its own task if it is needed. @@ -51,7 +63,7 @@ def rfc_editor_index_update_task(full_index=False): timeout=30, # seconds ) except requests.Timeout as exc: - log.log(f'GET request timed out retrieving RFC editor index: {exc}') + log.log(f"GET request timed out retrieving RFC editor index: {exc}") return # failed rfc_index_xml = response.text index_data = rfceditor.parse_index(io.StringIO(rfc_index_xml)) @@ -61,9 +73,9 @@ def rfc_editor_index_update_task(full_index=False): timeout=30, # seconds ) except requests.Timeout as exc: - log.log(f'GET request timed out retrieving RFC editor errata: {exc}') + log.log(f"GET request timed out retrieving RFC editor errata: {exc}") return # failed - errata_data = response.json() + errata_data = response.json() if len(index_data) < rfceditor.MIN_INDEX_RESULTS: log.log("Not enough index entries, only %s" % len(index_data)) return # failed @@ -96,15 +108,15 @@ def rfc_editor_queue_updates_task(): drafts, warnings = parse_queue(io.StringIO(response.text)) for w in warnings: log.log(f"Warning: {w}") - + if len(drafts) < MIN_QUEUE_RESULTS: log.log("Not enough results, only %s" % len(drafts)) return # failed - + changed, warnings = update_drafts_from_queue(drafts) for w in warnings: log.log(f"Warning: {w}") - + for c in changed: log.log(f"Updated {c}") @@ -120,9 +132,11 @@ def iana_changes_update_task(): MAX_INTERVAL_ACCEPTED_BY_IANA = datetime.timedelta(hours=23) start = ( - timezone.now() - - datetime.timedelta(hours=23) - + datetime.timedelta(seconds=CLOCK_SKEW_COMPENSATION,) + timezone.now() + - datetime.timedelta(hours=23) + + datetime.timedelta( + seconds=CLOCK_SKEW_COMPENSATION, + ) ) end = start + datetime.timedelta(hours=23) @@ -133,7 +147,9 @@ def iana_changes_update_task(): # requests if necessary text = iana.fetch_changes_json( - settings.IANA_SYNC_CHANGES_URL, t, min(end, t + MAX_INTERVAL_ACCEPTED_BY_IANA) + settings.IANA_SYNC_CHANGES_URL, + t, + min(end, t + MAX_INTERVAL_ACCEPTED_BY_IANA), ) log.log(f"Retrieved the JSON: {text}") @@ -159,9 +175,9 @@ def iana_protocols_update_task(): # "this needs to be the date where this tool is first deployed" in the original # iana-protocols-updates script)" rfc_must_published_later_than = datetime.datetime( - 2012, - 11, - 26, + 2012, + 11, + 26, tzinfo=datetime.UTC, ) @@ -171,17 +187,17 @@ def iana_protocols_update_task(): timeout=30, ) except requests.Timeout as exc: - log.log(f'GET request timed out retrieving IANA protocols page: {exc}') + log.log(f"GET request timed out retrieving IANA protocols page: {exc}") return rfc_numbers = iana.parse_protocol_page(response.text) def batched(l, n): """Split list l up in batches of max size n. - + For Python 3.12 or later, replace this with itertools.batched() """ - return (l[i:i + n] for i in range(0, len(l), n)) + return (l[i : i + n] for i in range(0, len(l), n)) for batch in batched(rfc_numbers, 100): updated = iana.update_rfc_log_from_protocol_page( @@ -192,6 +208,7 @@ def batched(l, n): for d in updated: log.log("Added history entry for %s" % d.display_name()) + @shared_task def fix_subseries_docevents_task(): """Repairs DocEvents related to bugs around removing docs from subseries @@ -233,6 +250,7 @@ def fix_subseries_docevents_task(): time=obsoleting_time ) + @shared_task def rsync_rfcs_from_rfceditor_task(rfc_numbers: list[int]): log.log(f"Rsyncing rfcs from rfc-editor: {rfc_numbers}") @@ -276,7 +294,51 @@ def load_rfcs_into_blobdb_task(start: int, end: int): @shared_task -def create_rfc_index_task(): - create_rfc_txt_index() - create_rfc_xml_index() +def update_errata_from_rfceditor_task(): + if errata_are_dirty(): + # new_processed_time is the *start* of processing so that any changes after + # this point will trigger another refresh + new_processed_time = timezone.now() + update_errata_from_rfceditor() + mark_errata_as_processed(new_processed_time) + mark_rfcindex_as_dirty() # ensure any changes are reflected in the indexes + +@shared_task +def refresh_rfc_index_task(): + if rfcindex_is_dirty(): + # new_processed_time is the *start* of processing so that any changes after + # this point will trigger another refresh + new_processed_time = timezone.now() + + try: + create_rfc_txt_index() + except Exception as e: + log.log(f"Error: failure in creating rfc-index.txt. {e}") + pass + + try: + create_rfc_xml_index() + except Exception as e: + log.log(f"Error: failure in creating rfc-index.xml. {e}") + pass + + try: + create_bcp_txt_index() + except Exception as e: + log.log(f"Error: failure in creating bcp-index.txt. {e}") + pass + + try: + create_std_txt_index() + except Exception as e: + log.log(f"Error: failure in creating std-index.txt. {e}") + pass + + try: + create_fyi_txt_index() + except Exception as e: + log.log(f"Error: failure in creating fyi-index.txt. {e}") + pass + + mark_rfcindex_as_processed(new_processed_time) diff --git a/ietf/sync/tests.py b/ietf/sync/tests.py index 21d6cb5cd5..e83b6a5e0a 100644 --- a/ietf/sync/tests.py +++ b/ietf/sync/tests.py @@ -1,5 +1,4 @@ -# Copyright The IETF Trust 2012-2020, All Rights Reserved -# -*- coding: utf-8 -*- +# Copyright The IETF Trust 2012-2026, All Rights Reserved import os @@ -13,6 +12,8 @@ from dataclasses import dataclass from django.conf import settings +from django.core.files.base import ContentFile +from django.core.files.storage import storages from django.urls import reverse as urlreverse from django.utils import timezone from django.test.utils import override_settings @@ -25,15 +26,34 @@ RfcFactory, DocumentAuthorFactory, DocEventFactory, - BcpFactory, + BcpFactory, WgRfcFactory, +) +from ietf.doc.models import ( + Document, + DocEvent, + DeletedEvent, + DocTagName, + RelatedDocument, + State, + StateDocEvent, ) -from ietf.doc.models import Document, DocEvent, DeletedEvent, DocTagName, RelatedDocument, State, StateDocEvent from ietf.doc.utils import add_state_change_event from ietf.group.factories import GroupFactory from ietf.person.factories import PersonFactory from ietf.person.models import Person from ietf.sync import iana, rfceditor, tasks +from ietf.sync.errata import ( + update_errata_from_rfceditor, + get_errata_last_updated, + get_errata_data, + errata_map_from_json, + update_errata_dirty_time, + mark_errata_as_processed, + update_errata_tags, +) +from ietf.sync.tasks import update_errata_from_rfceditor_task from ietf.utils.mail import outbox, empty_outbox +from ietf.utils.models import DirtyBits from ietf.utils.test_utils import login_testing_unauthorized from ietf.utils.test_utils import TestCase from ietf.utils.timezone import date_today, RPC_TZINFO @@ -882,6 +902,191 @@ def test_rfceditor_undo(self): self.assertTrue(StateDocEvent.objects.filter(desc="First", doc=draft)) +class ErrataTests(TestCase): + @override_settings(ERRATA_JSON_BLOB_NAME="myblob.json") + def test_get_errata_last_update(self): + red_bucket = storages["red_bucket"] # InMemoryStorage in test + red_bucket.save("myblob.json", ContentFile("file")) + self.assertEqual( + get_errata_last_updated(), red_bucket.get_modified_time("myblob.json") + ) + + @override_settings(ERRATA_JSON_BLOB_NAME="myblob.json") + def test_get_errata_data(self): + red_bucket = storages["red_bucket"] # InMemoryStorage in test + red_bucket.save("myblob.json", ContentFile('[{"value": 3}]')) + self.assertEqual( + get_errata_data(), + [{"value": 3}], + ) + + def test_errata_map_from_json(self): + input_data = [ + { + "doc-id": "not-an-rfc", + "errata_status_code": "Verified", + }, + { + "doc-id": "rfc01234", + "errata_status_code": "Reported", + }, + { + "doc-id": "RFC1001", + "errata_status_code": "Verified" + }, + { + "doc-id": "RfC1234", + "errata_status_code": "Verified", + }, + ] + expected_output = {1001: [input_data[2]], 1234: [input_data[1], input_data[3]]} + self.assertDictEqual(errata_map_from_json(input_data), expected_output) + + @mock.patch("ietf.sync.errata.update_errata_tags") + @mock.patch("ietf.sync.errata.get_errata_data") + def test_update_errata_from_rfceditor(self, mock_get_data, mock_update): + fake_data = object() + mock_get_data.return_value = fake_data + update_errata_from_rfceditor() + self.assertTrue(mock_get_data.called) + self.assertTrue(mock_update.called) + self.assertEqual(mock_update.call_args, mock.call(fake_data)) + + def test_update_errata_tags(self): + tag_has_errata = DocTagName.objects.get(slug="errata") + tag_has_verified_errata = DocTagName.objects.get(slug="verified-errata") + + rfcs = WgRfcFactory.create_batch(10) + rfcs[0].tags.set([tag_has_errata]) + rfcs[1].tags.set([tag_has_errata, tag_has_verified_errata]) + rfcs[2].tags.set([tag_has_errata]) + rfcs[3].tags.set([tag_has_errata, tag_has_verified_errata]) + rfcs[4].tags.set([tag_has_errata]) + rfcs[5].tags.set([tag_has_errata, tag_has_verified_errata]) + + # Only contains the fields we care about, not the full JSON + errata_data = [ + # rfcs[0] had errata and should keep it + {"doc-id": rfcs[0].name, "errata_status_code": "Held for Document Update"}, + {"doc-id": rfcs[0].name, "errata_status_code": "Rejected"}, + # rfcs[1] had errata+verified-errata and should keep both + {"doc-id": rfcs[1].name, "errata_status_code": "Verified"}, + # rfcs[2] had errata and should gain verified-errata + {"doc-id": rfcs[2].name, "errata_status_code": "Verified"}, + # rfcs[3] had errata+verified errata and should lose both + {"doc-id": rfcs[3].name, "errata_status_code": "Rejected"}, + # rfcs[4] had errata and should gain verified-errata + {"doc-id": rfcs[4].name, "errata_status_code": "Verified"}, + {"doc-id": rfcs[4].name, "errata_status_code": "Reported"}, + # rfcs[5] had errata+verified-errata and should lose verified-errata + {"doc-id": rfcs[5].name, "errata_status_code": "Reported"}, + # rfcs[6] had none and should gain errata + {"doc-id": rfcs[6].name, "errata_status_code": "Reported"}, + # rfcs[7] had none and should gain errata+verified-errata + {"doc-id": rfcs[7].name, "errata_status_code": "Verified"}, + # rfcs[8] had none and it should stay that way + {"doc-id": rfcs[8].name, "errata_status_code": "Rejected"}, + # rfcs[9] had none and it should stay that way (no entry at all) + ] + update_errata_tags(errata_data) + + self.assertCountEqual(rfcs[0].tags.all(), [tag_has_errata]) + self.assertIsNone(rfcs[0].docevent_set.first()) # no change + + self.assertCountEqual( + rfcs[1].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertIsNone(rfcs[1].docevent_set.first()) # no change + + self.assertCountEqual( + rfcs[2].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertEqual(rfcs[2].docevent_set.count(), 1) + self.assertIn(": added verified-errata tag", rfcs[2].docevent_set.first().desc) + + self.assertCountEqual(rfcs[3].tags.all(), []) + self.assertEqual(rfcs[3].docevent_set.count(), 1) + self.assertIn( + ": removed errata tag, removed verified-errata tag (all errata rejected)", + rfcs[3].docevent_set.first().desc, + ) + + self.assertCountEqual( + rfcs[4].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertEqual(rfcs[4].docevent_set.count(), 1) + self.assertIn(": added verified-errata tag", rfcs[4].docevent_set.first().desc) + + self.assertCountEqual(rfcs[5].tags.all(), [tag_has_errata]) + self.assertEqual(rfcs[5].docevent_set.count(), 1) + self.assertIn( + ": removed verified-errata tag", rfcs[5].docevent_set.first().desc + ) + + self.assertCountEqual(rfcs[6].tags.all(), [tag_has_errata]) + self.assertEqual(rfcs[6].docevent_set.count(), 1) + self.assertIn(": added errata tag", rfcs[6].docevent_set.first().desc) + + self.assertCountEqual( + rfcs[7].tags.all(), [tag_has_errata, tag_has_verified_errata] + ) + self.assertEqual(rfcs[7].docevent_set.count(), 1) + self.assertIn( + ": added errata tag, added verified-errata tag", + rfcs[7].docevent_set.first().desc, + ) + + self.assertCountEqual(rfcs[8].tags.all(), []) + self.assertIsNone(rfcs[8].docevent_set.first()) # no change + + self.assertCountEqual(rfcs[9].tags.all(), []) + self.assertIsNone(rfcs[9].docevent_set.first()) # no change + + @override_settings(ERRATA_JSON_BLOB_NAME="myblob.json") + @mock.patch("ietf.sync.errata.get_errata_last_updated") + def test_update_errata_dirty_time(self, mock_last_updated): + ERRATA_SLUG = DirtyBits.Slugs.ERRATA + + # No time available + mock_last_updated.side_effect = FileNotFoundError + self.assertIsNone(DirtyBits.objects.filter(slug=ERRATA_SLUG).first()) + self.assertIsNone(update_errata_dirty_time()) # no blob yet + self.assertIsNone(DirtyBits.objects.filter(slug=ERRATA_SLUG).first()) + + # Now set a time + first_timestamp = timezone.now() - datetime.timedelta(hours=3) + mock_last_updated.return_value = first_timestamp + mock_last_updated.side_effect = None + result = update_errata_dirty_time() + self.assertTrue(isinstance(result, DirtyBits)) + result.refresh_from_db() + self.assertEqual(result.slug, ERRATA_SLUG) + self.assertEqual(result.processed_time, None) + self.assertEqual(result.dirty_time, first_timestamp) + + # Update the time + second_timestamp = timezone.now() + mock_last_updated.return_value = second_timestamp + second_result = update_errata_dirty_time() + self.assertEqual(result.pk, second_result.pk) # should be the same record + result.refresh_from_db() + self.assertEqual(result.slug, ERRATA_SLUG) + self.assertEqual(result.processed_time, None) + self.assertEqual(result.dirty_time, second_timestamp) + + def test_mark_errata_as_processed(self): + ERRATA_SLUG = DirtyBits.Slugs.ERRATA + first_timestamp = timezone.now() + mark_errata_as_processed(first_timestamp) # no DirtyBits is not an error + self.assertIsNone(DirtyBits.objects.filter(slug=ERRATA_SLUG).first()) + dbits = DirtyBits.objects.create(slug=ERRATA_SLUG, dirty_time=first_timestamp) + second_timestamp = timezone.now() + mark_errata_as_processed(second_timestamp) + dbits.refresh_from_db() + self.assertEqual(dbits.dirty_time, first_timestamp) + self.assertEqual(dbits.processed_time, second_timestamp) + + class TaskTests(TestCase): @override_settings( RFC_EDITOR_INDEX_URL="https://rfc-editor.example.com/index/", @@ -1215,3 +1420,28 @@ def test_load_rfcs_into_blobdb_task( self.assertEqual(mock_kwargs, {}) + @mock.patch("ietf.sync.tasks.update_errata_from_rfceditor") + @mock.patch("ietf.sync.tasks.mark_rfcindex_as_dirty") + @mock.patch("ietf.sync.tasks.mark_errata_as_processed") + @mock.patch("ietf.sync.tasks.errata_are_dirty") + def test_update_errata_from_rfceditor_task( + self, + mock_errata_are_dirty, + mock_mark_errata_processed, + mock_mark_rfcindex_dirty, + mock_update, + ): + mock_errata_are_dirty.return_value = False + update_errata_from_rfceditor_task() + self.assertTrue(mock_errata_are_dirty.called) + self.assertFalse(mock_mark_errata_processed.called) + self.assertFalse(mock_mark_rfcindex_dirty.called) + self.assertFalse(mock_update.called) + + mock_errata_are_dirty.reset_mock() + mock_errata_are_dirty.return_value = True + update_errata_from_rfceditor_task() + self.assertTrue(mock_errata_are_dirty.called) + self.assertTrue(mock_mark_errata_processed.called) + self.assertTrue(mock_mark_rfcindex_dirty.called) + self.assertTrue(mock_update.called) diff --git a/ietf/sync/tests_rfcindex.py b/ietf/sync/tests_rfcindex.py index e682c016f5..541ffbb228 100644 --- a/ietf/sync/tests_rfcindex.py +++ b/ietf/sync/tests_rfcindex.py @@ -7,16 +7,26 @@ from django.test.utils import override_settings from lxml import etree -from ietf.doc.factories import PublishedRfcDocEventFactory, IndividualRfcFactory +from ietf.doc.factories import ( + BcpFactory, + FyiFactory, + StdFactory, + IndividualRfcFactory, + PublishedRfcDocEventFactory, +) from ietf.name.models import DocTagName from ietf.sync.rfcindex import ( + create_bcp_txt_index, + create_fyi_txt_index, create_rfc_txt_index, create_rfc_xml_index, + create_std_txt_index, format_rfc_number, - save_to_red_bucket, - get_unusable_rfc_numbers, get_april1_rfc_numbers, get_publication_std_levels, + get_unusable_rfc_numbers, + save_to_red_bucket, + subseries_text_line, ) from ietf.utils.test_utils import TestCase @@ -69,6 +79,15 @@ def setUp(self): ).doc self.rfc.tags.add(DocTagName.objects.get(slug="errata")) + # Create a BCP with non-April Fools RFC + self.bcp = BcpFactory(contains=[self.rfc], name="bcp11") + + # Create a STD with non-April Fools RFC + self.std = StdFactory(contains=[self.rfc], name="std11") + + # Create a FYI with non-April Fools RFC + self.fyi = FyiFactory(contains=[self.rfc], name="fyi11") + # Set up a publication-std-levels.json file to indicate the publication # standard of self.rfc as different from its current value red_bucket.save( @@ -137,7 +156,7 @@ def test_create_rfc_xml_index(self, mock_save): children = list(index) # elements as list # Should be one rfc-not-issued-entry - self.assertEqual(len(children), 3) + self.assertEqual(len(children), 16) self.assertEqual( [ c.find(f"{ns}doc-id").text @@ -184,6 +203,159 @@ def test_create_rfc_xml_index(self, mock_save): [(f"{ns}month", "April"), (f"{ns}year", "2021")], ) + @override_settings(RFCINDEX_INPUT_PATH="input/") + @mock.patch("ietf.sync.rfcindex.save_to_red_bucket") + def test_create_bcp_txt_index(self, mock_save): + create_bcp_txt_index() + self.assertEqual(mock_save.call_count, 1) + self.assertEqual(mock_save.call_args[0][0], "bcp-index.txt") + contents = mock_save.call_args[0][1] + self.assertTrue(isinstance(contents, str)) + # starts from 1 + self.assertIn( + "[BCP1]", + contents, + ) + # fill up to 11 + self.assertIn( + "[BCP10]", + contents, + ) + # but not to 12 + self.assertNotIn( + "[BCP12]", + contents, + ) + # Test empty BCPs + self.assertIn( + "Best Current Practice 9 currently contains no RFCs", + contents, + ) + # No zero prefix! + self.assertNotIn( + "[BCP0001]", + contents, + ) + # Has BCP11 with a RFC + self.assertIn( + "Best Current Practice 11,", + contents, + ) + self.assertIn( + f'"{self.rfc.title}"', + contents, + ) + self.assertIn( + "BCP 11,", + contents, + ) + self.assertIn( + f"RFC {self.rfc.rfc_number},", + contents, + ) + + @override_settings(RFCINDEX_INPUT_PATH="input/") + @mock.patch("ietf.sync.rfcindex.save_to_red_bucket") + def test_create_std_txt_index(self, mock_save): + create_std_txt_index() + self.assertEqual(mock_save.call_count, 1) + self.assertEqual(mock_save.call_args[0][0], "std-index.txt") + contents = mock_save.call_args[0][1] + self.assertTrue(isinstance(contents, str)) + # starts from 1 + self.assertIn( + "[STD1]", + contents, + ) + # fill up to 11 + self.assertIn( + "[STD10]", + contents, + ) + # but not to 12 + self.assertNotIn( + "[STD12]", + contents, + ) + # Test empty STDs + self.assertIn( + "Internet Standard 9 currently contains no RFCs", + contents, + ) + # No zero prefix! + self.assertNotIn( + "[STD0001]", + contents, + ) + # Has STD11 with a RFC + self.assertIn( + "Internet Standard 11,", + contents, + ) + self.assertIn( + f'"{self.rfc.title}"', + contents, + ) + self.assertIn( + "STD 11,", + contents, + ) + self.assertIn( + f"RFC {self.rfc.rfc_number},", + contents, + ) + + @override_settings(RFCINDEX_INPUT_PATH="input/") + @mock.patch("ietf.sync.rfcindex.save_to_red_bucket") + def test_create_fyi_txt_index(self, mock_save): + create_fyi_txt_index() + self.assertEqual(mock_save.call_count, 1) + self.assertEqual(mock_save.call_args[0][0], "fyi-index.txt") + contents = mock_save.call_args[0][1] + self.assertTrue(isinstance(contents, str)) + # starts from 1 + self.assertIn( + "[FYI1]", + contents, + ) + # fill up to 11 + self.assertIn( + "[FYI10]", + contents, + ) + # but not to 12 + self.assertNotIn( + "[FYI12]", + contents, + ) + # Test empty FYIs + self.assertIn( + "For Your Information 9 currently contains no RFCs", + contents, + ) + # No zero prefix! + self.assertNotIn( + "[FYI0001]", + contents, + ) + # Has FYI11 with a RFC + self.assertIn( + "For Your Information 11,", + contents, + ) + self.assertIn( + f'"{self.rfc.title}"', + contents, + ) + self.assertIn( + "FYI 11,", + contents, + ) + self.assertIn( + f"RFC {self.rfc.rfc_number},", + contents, + ) + class HelperTests(TestCase): def test_format_rfc_number(self): @@ -234,3 +406,8 @@ def test_get_publication_std_levels_raises(self): with self.assertRaises(json.JSONDecodeError): get_publication_std_levels() red_bucket.delete("publication-std-levels.json") + + def test_subseries_text_line(self): + text = "foobar" + self.assertEqual(subseries_text_line(line=text, first=True), f" {text}") + self.assertEqual(subseries_text_line(line=text), f" {text}") diff --git a/ietf/templates/base.html b/ietf/templates/base.html index 25ce50c467..b0df04f30a 100644 --- a/ietf/templates/base.html +++ b/ietf/templates/base.html @@ -67,13 +67,17 @@ {% endif %} - +
  • -
  • - +
  • + Statistics -