From a72af0df5263e378561b448aef6eabb758a9f8a4 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Oct 2025 12:13:39 -0300 Subject: [PATCH 01/13] feat: meeting materials blob resolver API (#9700) * refactor: exclude_deleted() for StoredObject queryset * chore: comment * feat: meeting materials blob resolver API --- ietf/api/urls.py | 2 + ietf/doc/models.py | 9 +++ ietf/doc/storage.py | 4 +- ietf/meeting/views.py | 139 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 151 insertions(+), 3 deletions(-) diff --git a/ietf/api/urls.py b/ietf/api/urls.py index 6f2efb3c1e..830b251997 100644 --- a/ietf/api/urls.py +++ b/ietf/api/urls.py @@ -49,6 +49,8 @@ url(r'^group/role-holder-addresses/$', api_views.role_holder_addresses), # Let IESG members set positions programmatically url(r'^iesg/position', views_ballot.api_set_position), + # Find the blob to store for a given materials document path + url(r'^meeting/(?:(?P(?:interim-)?[a-z0-9-]+)/)?materials/%(document)s(?P\.[A-Za-z0-9]+)?/resolve/$' % settings.URL_REGEXPS, meeting_views.api_resolve_materials_name), # Let Meetecho set session video URLs url(r'^meeting/session/video/url$', meeting_views.api_set_session_video_url), # Let Meetecho tell us the name of its recordings diff --git a/ietf/doc/models.py b/ietf/doc/models.py index 25ee734cbe..8bb79b64ed 100644 --- a/ietf/doc/models.py +++ b/ietf/doc/models.py @@ -913,6 +913,7 @@ def role_for_doc(self): roles.append('Action Holder') return ', '.join(roles) +# N.B., at least a couple dozen documents exist that do not satisfy this validator validate_docname = RegexValidator( r'^[-a-z0-9]+$', "Provide a valid document name consisting of lowercase letters, numbers and hyphens.", @@ -1588,9 +1589,17 @@ class BofreqResponsibleDocEvent(DocEvent): """ Capture the responsible leadership (IAB and IESG members) for a BOF Request """ responsible = models.ManyToManyField('person.Person', blank=True) + +class StoredObjectQuerySet(models.QuerySet): + def exclude_deleted(self): + return self.filter(deleted__isnull=True) + + class StoredObject(models.Model): """Hold metadata about objects placed in object storage""" + objects = StoredObjectQuerySet.as_manager() + store = models.CharField(max_length=256) name = models.CharField(max_length=1024, null=False, blank=False) # N.B. the 1024 limit on name comes from S3 sha384 = models.CharField(max_length=96) diff --git a/ietf/doc/storage.py b/ietf/doc/storage.py index a234ef2d4f..c6851b14be 100644 --- a/ietf/doc/storage.py +++ b/ietf/doc/storage.py @@ -32,7 +32,7 @@ def __init__(self, file, name, mtime=None, content_type="", store=None, doc_name @classmethod def from_storedobject(cls, file, name, store): """Alternate constructor for objects that already exist in the StoredObject table""" - stored_object = StoredObject.objects.filter(store=store, name=name, deleted__isnull=True).first() + stored_object = StoredObject.objects.exclude_deleted().filter(store=store, name=name).first() if stored_object is None: raise FileNotFoundError(f"StoredObject for {store}:{name} does not exist or was deleted") file = cls(file, name, store, doc_name=stored_object.doc_name, doc_rev=stored_object.doc_rev) @@ -160,7 +160,7 @@ def _delete_stored_object(self, name) -> Optional[StoredObject]: else: now = timezone.now() # Note that existing_record is a queryset that will have one matching object - existing_record.filter(deleted__isnull=True).update(deleted=now) + existing_record.exclude_deleted().update(deleted=now) return existing_record.first() def _save(self, name, content): diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index fcc9312609..e444391785 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -48,11 +48,12 @@ from django.views.decorators.cache import cache_page from django.views.decorators.csrf import ensure_csrf_cookie, csrf_exempt from django.views.generic import RedirectView +from rest_framework.status import HTTP_404_NOT_FOUND, HTTP_400_BAD_REQUEST import debug # pyflakes:ignore from ietf.doc.fields import SearchableDocumentsField -from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent +from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent, StoredObject from ietf.doc.storage_utils import remove_from_storage, retrieve_bytes, store_file from ietf.group.models import Group from ietf.group.utils import can_manage_session_materials, can_manage_some_groups, can_manage_group @@ -120,6 +121,7 @@ from icalendar import Calendar, Event from ietf.doc.templatetags.ietf_filters import absurl +from ..api.ietf_utils import requires_api_token request_summary_exclude_group_types = ['team'] @@ -334,6 +336,141 @@ def materials_document(request, document, num=None, ext=None): return HttpResponseRedirect(redirect_to=doc.get_href(meeting=meeting)) +@requires_api_token +def api_resolve_materials_name(request, document, num=None, ext=None): + """Resolve materials name into document to a blob spec + + Returns the bucket/name of a blob in the blob store that corresponds to the named + document. Handles resolution of revision if it is not specified and determines the + best extension if one is not provided. Response is JSON. + + As of 2025-10-10 we do not have blobs for all materials documents or for every + format of every document. This API still returns the bucket/name as if the blob + exists. Another API will allow the caller to obtain the file contents using that + name if it cannot be retrieved from the blob store. + """ + def _error_response(status: int, detail: str): + return JsonResponse( + { + "status": status, + "title": "Error", + "detail": detail, + }, + status=status, + ) + + def _response(bucket: str, name: str): + return JsonResponse( + { + "bucket": bucket, + "name": name, + } + ) + + try: + meeting = get_meeting(num, type_in=["ietf", "interim"]) + except Http404 as err404: + return _error_response( + HTTP_404_NOT_FOUND, str(err404) + ) + + num = meeting.number + try: + doc, rev = _get_materials_doc(meeting=meeting, name=document) + except Document.DoesNotExist: + return _error_response( + HTTP_404_NOT_FOUND, f"No such document for meeting {num}" + ) + + # Get the Document's base name. It may or may not have an extension. + if rev is None: + basename = Path(doc.get_base_name()) + else: + basename = Path(f"{doc.name}-{rev:02d}") + + # If we have an extension, either from the URL or the Document's base name, look up + # the blob or file or return 404. + if ext or basename.suffix != "": + if ext: + basename = basename.with_suffix(ext) + + # See if we have a stored object under that name + blob = StoredObject.objects.exclude_deleted().filter( + store=doc.type_id, name=basename + ).first() + if blob is not None: + return _response( + bucket=blob.store, + name=blob.name, + ) + # No stored object, fall back to the file system. + filename = Path(doc.get_file_path()) / basename + if filename.exists(): + return _response( + bucket=doc.type_id, + name=str(basename), + ) + else: + return _error_response( + HTTP_404_NOT_FOUND, + f"No file {basename} available for {document} for meeting {num}", + ) + + # No extension has been specified so far, so look one up. + matching_stored_objects = StoredObject.objects.exclude_deleted().filter( + store=doc.type_id, + name__startswith=f"{basename.stem}." # anchor to end with trailing "." + ).order_by("name") # orders by suffix + blob_ext_choices = { + Path(stored_obj.name).suffix: stored_obj + for stored_obj in matching_stored_objects + } + + # Short-circuit to return pdf if present + if ".pdf" in blob_ext_choices: + pdf_blob = blob_ext_choices[".pdf"] + return _response( + bucket=pdf_blob.store, + name=pdf_blob.name, + ) + + # Now look for files + filename = Path(doc.get_file_path()) / basename + file_ext_choices = { + # Construct a map from suffix to full filename + fn.suffix: str(fn) + for fn in sorted(filename.parent.glob(filename.stem + ".*")) + } + + # Short-circuit to return pdf if we have the file + if ".pdf" in file_ext_choices: + pdf_filename = file_ext_choices[".pdf"] + return _response( + bucket=doc.type_id, + name=pdf_filename, + ) + + all_exts = set(blob_ext_choices.keys()).union(file_ext_choices.keys()) + if len(all_exts) > 0: + preferred_ext = sorted(all_exts)[0] + if preferred_ext in blob_ext_choices: + pdf_blob = blob_ext_choices[preferred_ext] + return _response( + bucket=pdf_blob.store, + name=pdf_blob.name, + ) + else: + pdf_filename = file_ext_choices[".pdf"] + return _response( + bucket=doc.type_id, + name=pdf_filename, + ) + + return _error_response( + HTTP_404_NOT_FOUND, f"No suitable file for {document} for meeting {num}" + ) + + @login_required def materials_editable_groups(request, num=None): meeting = get_meeting(num) From d4ff7462d42895e1ea81c77eeafedfc967d7b437 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Oct 2025 20:08:35 -0300 Subject: [PATCH 02/13] feat: materials blob retrieval API (#9728) * feat: materials blob retrieval API (WIP) * refactor: alphabetize ARTIFACT_STORAGE_NAMES * chore: limit buckets served * refactor: any-meeting option in _get_materials_doc() * feat: create missing blobs on retrieval --- ietf/api/urls.py | 1 + ietf/doc/storage_utils.py | 12 ++++- ietf/meeting/views.py | 95 +++++++++++++++++++++++++++++++++++---- ietf/settings.py | 30 ++++++------- 4 files changed, 112 insertions(+), 26 deletions(-) diff --git a/ietf/api/urls.py b/ietf/api/urls.py index 830b251997..b6bf29588e 100644 --- a/ietf/api/urls.py +++ b/ietf/api/urls.py @@ -51,6 +51,7 @@ url(r'^iesg/position', views_ballot.api_set_position), # Find the blob to store for a given materials document path url(r'^meeting/(?:(?P(?:interim-)?[a-z0-9-]+)/)?materials/%(document)s(?P\.[A-Za-z0-9]+)?/resolve/$' % settings.URL_REGEXPS, meeting_views.api_resolve_materials_name), + url(r'^meeting/blob/(?P[a-z0-9-]+)/(?P[a-z][a-z0-9.-]+)$', meeting_views.api_retrieve_materials_blob), # Let Meetecho set session video URLs url(r'^meeting/session/video/url$', meeting_views.api_set_session_video_url), # Let Meetecho tell us the name of its recordings diff --git a/ietf/doc/storage_utils.py b/ietf/doc/storage_utils.py index 510c98c4f5..a25dcb6305 100644 --- a/ietf/doc/storage_utils.py +++ b/ietf/doc/storage_utils.py @@ -12,6 +12,14 @@ from ietf.utils.log import log +class StorageUtilsError(Exception): + pass + + +class BlobExistsError(StorageUtilsError): + pass + + def _get_storage(kind: str) -> Storage: if kind in settings.ARTIFACT_STORAGE_NAMES: return storages[kind] @@ -70,7 +78,7 @@ def store_file( # debug.show('f"Asked to store {name} in {kind}: is_new={is_new}, allow_overwrite={allow_overwrite}"') if not allow_overwrite and not is_new: debug.show('f"Failed to save {kind}:{name} - name already exists in store"') - raise RuntimeError(f"Failed to save {kind}:{name} - name already exists in store") + raise BlobExistsError(f"Failed to save {kind}:{name} - name already exists in store") new_name = _get_storage(kind).save( name, StoredObjectFile( @@ -85,7 +93,7 @@ def store_file( if new_name != name: complaint = f"Error encountered saving '{name}' - results stored in '{new_name}' instead." debug.show("complaint") - raise RuntimeError(complaint) + raise StorageUtilsError(complaint) except Exception as err: log(f"Blobstore Error: Failed to store file {kind}:{name}: {repr(err)}") if settings.SERVER_MODE == "development": diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index e444391785..b6fbc33fcc 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -27,10 +27,12 @@ from django import forms from django.core.cache import caches +from django.core.files.storage import storages from django.shortcuts import render, redirect, get_object_or_404 from django.http import (HttpResponse, HttpResponseRedirect, HttpResponseForbidden, HttpResponseNotFound, Http404, HttpResponseBadRequest, - JsonResponse, HttpResponseGone, HttpResponseNotAllowed) + JsonResponse, HttpResponseGone, HttpResponseNotAllowed, + FileResponse) from django.conf import settings from django.contrib import messages from django.contrib.auth.decorators import login_required @@ -54,7 +56,8 @@ from ietf.doc.fields import SearchableDocumentsField from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent, StoredObject -from ietf.doc.storage_utils import remove_from_storage, retrieve_bytes, store_file +from ietf.doc.storage_utils import remove_from_storage, retrieve_bytes, store_file, \ + exists_in_storage, BlobExistsError from ietf.group.models import Group from ietf.group.utils import can_manage_session_materials, can_manage_some_groups, can_manage_group from ietf.person.models import Person, User @@ -122,6 +125,7 @@ from icalendar import Calendar, Event from ietf.doc.templatetags.ietf_filters import absurl from ..api.ietf_utils import requires_api_token +from ..blobdb.storage import BlobdbStorage, BlobFile request_summary_exclude_group_types = ['team'] @@ -247,21 +251,32 @@ def current_materials(request): raise Http404('No such meeting') -def _get_materials_doc(meeting, name): +def _get_materials_doc(name, meeting=None): """Get meeting materials document named by name - Raises Document.DoesNotExist if a match cannot be found. + Raises Document.DoesNotExist if a match cannot be found. If meeting is None, + matches a name that is associated with _any_ meeting. """ + + def _matches_meeting(doc, meeting=None): + if meeting is None: + return doc.get_related_meeting() is not None + return doc.get_related_meeting() == meeting + # try an exact match first doc = Document.objects.filter(name=name).first() - if doc is not None and doc.get_related_meeting() == meeting: + if doc is not None and _matches_meeting(doc, meeting): return doc, None + # try parsing a rev number if "-" in name: docname, rev = name.rsplit("-", 1) if len(rev) == 2 and rev.isdigit(): doc = Document.objects.get(name=docname) # may raise Document.DoesNotExist - if doc.get_related_meeting() == meeting and rev in doc.revisions_by_newrevisionevent(): + if ( + _matches_meeting(doc, meeting) + and rev in doc.revisions_by_newrevisionevent() + ): return doc, rev # give up raise Document.DoesNotExist @@ -279,7 +294,7 @@ def materials_document(request, document, num=None, ext=None): meeting = get_meeting(num, type_in=["ietf", "interim"]) num = meeting.number try: - doc, rev = _get_materials_doc(meeting=meeting, name=document) + doc, rev = _get_materials_doc(name=document, meeting=meeting) except Document.DoesNotExist: raise Http404("No such document for meeting %s" % num) @@ -376,7 +391,7 @@ def _response(bucket: str, name: str): num = meeting.number try: - doc, rev = _get_materials_doc(meeting=meeting, name=document) + doc, rev = _get_materials_doc(name=document, meeting=meeting) except Document.DoesNotExist: return _error_response( HTTP_404_NOT_FOUND, f"No such document for meeting {num}" @@ -386,7 +401,7 @@ def _response(bucket: str, name: str): if rev is None: basename = Path(doc.get_base_name()) else: - basename = Path(f"{doc.name}-{rev:02d}") + basename = Path(f"{doc.name}-{int(rev):02d}") # If we have an extension, either from the URL or the Document's base name, look up # the blob or file or return 404. @@ -471,6 +486,68 @@ def _response(bucket: str, name: str): ) +@requires_api_token +def api_retrieve_materials_blob(request, bucket, name): + ALLOWED_BUCKETS = { + "agenda", + "chatlog", + "minutes", + "narrativeminutes", + "polls", + "procmaterials", + "slides", + } + DEFAULT_CONTENT_TYPES = { + ".html": "text/html;charset=utf-8", + ".md": "text/markdown;charset=utf-8", + ".pdf": "application/pdf", + ".txt": "text/plain;charset=utf-8", + } + + def _default_content_type(blob_name: str): + return DEFAULT_CONTENT_TYPES.get(Path(name).suffix, "application/octet-stream") + + if not settings.ENABLE_BLOBSTORAGE or bucket not in ALLOWED_BUCKETS: + return HttpResponseNotFound(f"Bucket {bucket} not found.") + storage = storages[bucket] # if not configured, a server error will result + assert isinstance(storage, BlobdbStorage) + try: + blob = storage.open(name, "rb") + assert isinstance(blob, BlobFile) + return FileResponse( + blob, + filename=name, + content_type=blob.content_type or _default_content_type(name), + ) + except FileNotFoundError: + # See if we have a meeting-related document that matches the request + try: + doc, rev = _get_materials_doc(Path(name).stem) + except Document.DoesNotExist: + pass + else: + if doc.type_id == bucket and doc.get_base_name() == name: + filename = Path(doc.get_file_path()) / name + with filename.open("rb") as f: + try: + store_file( + kind=bucket, + name=name, + file=f, + allow_overwrite=False, + doc_name=doc.name, + doc_rev=doc.rev, + ) + except BlobExistsError: + pass # likely results from a race + return FileResponse( + filename.open("rb"), + filename=name, + content_type=_default_content_type(name), + ) + return HttpResponseNotFound(f"Object {bucket}:{name} not found.") + + @login_required def materials_editable_groups(request, num=None): meeting = get_meeting(num) diff --git a/ietf/settings.py b/ietf/settings.py index 9a213c1a73..466b7ea9d7 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -786,29 +786,29 @@ def skip_unreadable_post(record): # Storages for artifacts stored as blobs ARTIFACT_STORAGE_NAMES: list[str] = [ - "bofreq", - "charter", - "conflrev", "active-draft", - "draft", - "slides", - "minutes", "agenda", + "bibxml-ids", "bluesheets", - "procmaterials", - "narrativeminutes", - "statement", - "statchg", - "liai-att", + "bofreq", + "charter", "chatlog", - "polls", - "staging", - "bibxml-ids", - "indexes", + "conflrev", + "draft", "floorplan", + "indexes", + "liai-att", "meetinghostlogo", + "minutes", + "narrativeminutes", "photo", + "polls", + "procmaterials", "review", + "slides", + "staging", + "statchg", + "statement", ] for storagename in ARTIFACT_STORAGE_NAMES: STORAGES[storagename] = { From 99d7518cf583a4245eebaa819bc17ce692268d6c Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 14 Oct 2025 23:55:06 -0300 Subject: [PATCH 03/13] feat: render HTML from markdown via API (#9729) * chore: add comment * fix: allow bluesheets to be retrieved Normally not retrieved through /meeting/materials, but they're close enough in purpose that we might as well make them available. * fix: only update StoredObject.modified if changed * fix: preserve mtime when creating blob * refactor: better exception name * feat: render .md.html from .md blob * fix: explicit STATIC_IETF_ORG value in template Django's context_processors are not applied to render_string calls as we use them here, so settings are not available. * fix: typo * fix: decode utf-8 properly * feat: use filesystem to render .md.html --- ietf/doc/storage.py | 6 +- ietf/doc/storage_utils.py | 4 +- ietf/meeting/views.py | 161 ++++++++++++++++++++++++++++++------ ietf/templates/minimal.html | 4 +- 4 files changed, 146 insertions(+), 29 deletions(-) diff --git a/ietf/doc/storage.py b/ietf/doc/storage.py index c6851b14be..375620ccaf 100644 --- a/ietf/doc/storage.py +++ b/ietf/doc/storage.py @@ -140,7 +140,11 @@ def _save_stored_object(self, name, content) -> StoredObject: ), ), ) - if not created: + if not created and ( + record.sha384 != content.custom_metadata["sha384"] + or record.len != int(content.custom_metadata["len"]) + or record.deleted is not None + ): record.sha384 = content.custom_metadata["sha384"] record.len = int(content.custom_metadata["len"]) record.modified = now diff --git a/ietf/doc/storage_utils.py b/ietf/doc/storage_utils.py index a25dcb6305..81588c83ec 100644 --- a/ietf/doc/storage_utils.py +++ b/ietf/doc/storage_utils.py @@ -16,7 +16,7 @@ class StorageUtilsError(Exception): pass -class BlobExistsError(StorageUtilsError): +class AlreadyExistsError(StorageUtilsError): pass @@ -78,7 +78,7 @@ def store_file( # debug.show('f"Asked to store {name} in {kind}: is_new={is_new}, allow_overwrite={allow_overwrite}"') if not allow_overwrite and not is_new: debug.show('f"Failed to save {kind}:{name} - name already exists in store"') - raise BlobExistsError(f"Failed to save {kind}:{name} - name already exists in store") + raise AlreadyExistsError(f"Failed to save {kind}:{name} - name already exists in store") new_name = _get_storage(kind).save( name, StoredObjectFile( diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index b6fbc33fcc..1e25f2a80e 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -9,6 +9,8 @@ import json import math import os +from contextlib import suppress + import pytz import re import tarfile @@ -56,8 +58,13 @@ from ietf.doc.fields import SearchableDocumentsField from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent, StoredObject -from ietf.doc.storage_utils import remove_from_storage, retrieve_bytes, store_file, \ - exists_in_storage, BlobExistsError +from ietf.doc.storage_utils import ( + remove_from_storage, + retrieve_bytes, + store_file, + AlreadyExistsError, + store_str, store_bytes, +) from ietf.group.models import Group from ietf.group.utils import can_manage_session_materials, can_manage_some_groups, can_manage_group from ietf.person.models import Person, User @@ -337,6 +344,7 @@ def materials_document(request, document, num=None, ext=None): { "content": markdown.markdown(bytes.decode(encoding=chset)), "title": filename.name, + "static_ietf_org": settings.STATIC_IETF_ORG, }, ) content_type = content_type.replace("plain", "html", 1) @@ -488,8 +496,23 @@ def _response(bucket: str, name: str): @requires_api_token def api_retrieve_materials_blob(request, bucket, name): + """Retrieve contents of a meeting materials blob + + This is intended as a fallback if the web worker cannot retrieve a blob from + the blobstore itself. The most likely cause is retrieving an old materials document + that has not been backfilled. + + If a blob is requested that does not exist, this checks for it on the filesystem + and if found, adds it to the blobstore, creates a StoredObject record, and returns + the contents as it would have done if the blob was already present. + + As a special case, if a requested file with extension `.md.html` does not exist + but a file with the same name but extension `.md` does, `.md` file will be rendered + from markdown to html and returned / stored. + """ ALLOWED_BUCKETS = { "agenda", + "bluesheets", "chatlog", "minutes", "narrativeminutes", @@ -513,38 +536,128 @@ def _default_content_type(blob_name: str): assert isinstance(storage, BlobdbStorage) try: blob = storage.open(name, "rb") + except FileNotFoundError: + pass + else: + # found the blob - return it assert isinstance(blob, BlobFile) return FileResponse( blob, filename=name, content_type=blob.content_type or _default_content_type(name), ) - except FileNotFoundError: - # See if we have a meeting-related document that matches the request - try: - doc, rev = _get_materials_doc(Path(name).stem) + + # Did not find the blob. See if the filename is .md.html and, if so, see if we + # have the markdown. + name_as_path = Path(name) + if name_as_path.suffixes == [".md", ".html"]: + md_filename = name_as_path.stem + try: + md_file = storage.open(md_filename, "rb") + except FileNotFoundError: + pass + else: + md_src = md_file.read().decode("utf-8") + md_file.close() + # render the markdown + html = render_to_string( + "minimal.html", + { + "content": markdown.markdown(md_src), + "title": name_as_path.stem, + "static_ietf_org": settings.STATIC_IETF_ORG, + }, + ) + # Don't overwrite, but don't fail if the blob exists + with suppress(AlreadyExistsError): + store_str( + kind=bucket, + name=name, + content=html, + allow_overwrite=False, + # todo doc_name, + # todo doc_rev, + content_type="text/html;charset=utf-8", + ) + return HttpResponse(html) + # Didn't find .md as a blob, so check the filesystem. We do this here + # because we never write .md.html to the filesystem. + try: + # .stem.stem drops both extensions + doc, rev = _get_materials_doc(Path(md_filename).stem) except Document.DoesNotExist: pass else: - if doc.type_id == bucket and doc.get_base_name() == name: - filename = Path(doc.get_file_path()) / name - with filename.open("rb") as f: - try: - store_file( - kind=bucket, - name=name, - file=f, - allow_overwrite=False, - doc_name=doc.name, - doc_rev=doc.rev, - ) - except BlobExistsError: - pass # likely results from a race - return FileResponse( - filename.open("rb"), - filename=name, - content_type=_default_content_type(name), + if doc.type_id == bucket and doc.get_base_name() == md_filename: + filename = Path(doc.get_file_path()) / md_filename + md_bytes = filename.read_bytes() + # Don't overwrite, but don't fail if the blob exists + with suppress(AlreadyExistsError): + store_bytes( + kind=bucket, + name=md_filename, + content=md_bytes, + mtime=datetime.datetime.fromtimestamp( + filename.stat().st_mtime, + tz=datetime.UTC, + ), + allow_overwrite=False, + doc_name=doc.name, + doc_rev=doc.rev, + ) + # render the markdown + md_src = md_bytes.decode() + html = render_to_string( + "minimal.html", + { + "content": markdown.markdown(md_src), + "title": md_filename, + "static_ietf_org": settings.STATIC_IETF_ORG, + }, ) + # Don't overwrite, but don't fail if the blob exists + with suppress(AlreadyExistsError): + store_str( + kind=bucket, + name=name, + content=html, + allow_overwrite=False, + doc_name=doc.name, + doc_rev=doc.rev, + content_type="text/html;charset=utf-8", + ) + return HttpResponse(html) + + # See if we have a meeting-related document that matches the requested bucket and + # name. + try: + doc, rev = _get_materials_doc(Path(name).stem) + except Document.DoesNotExist: + pass + else: + if doc.type_id == bucket and doc.get_base_name() == name: + filename = Path(doc.get_file_path()) / name + with filename.open("rb") as f: + # Don't overwrite, but don't fail if the blob exists + with suppress(AlreadyExistsError): + store_file( + kind=bucket, + name=name, + file=f, + mtime=datetime.datetime.fromtimestamp( + filename.stat().st_mtime, + tz=datetime.UTC, + ), + allow_overwrite=False, + doc_name=doc.name, + doc_rev=doc.rev, + ) + return FileResponse( + filename.open("rb"), + filename=name, + content_type=_default_content_type(name), + ) + return HttpResponseNotFound(f"Object {bucket}:{name} not found.") diff --git a/ietf/templates/minimal.html b/ietf/templates/minimal.html index 87f661f501..15c432505e 100644 --- a/ietf/templates/minimal.html +++ b/ietf/templates/minimal.html @@ -9,8 +9,8 @@ {{ title }} - - + + {# load this in the head, to prevent flickering #} From 9708066548b441764f00105a3202112d7e35fac6 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 16 Oct 2025 12:05:23 -0300 Subject: [PATCH 04/13] fix: copy/paste error in api_resolve_materials_name --- ietf/meeting/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index 1e25f2a80e..8cc746a74b 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -483,7 +483,7 @@ def _response(bucket: str, name: str): name=pdf_blob.name, ) else: - pdf_filename = file_ext_choices[".pdf"] + pdf_filename = file_ext_choices[preferred_ext] return _response( bucket=doc.type_id, name=pdf_filename, From 7fd15801c7d072fbf468168bec863f1d079bcc1d Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 16 Oct 2025 20:43:55 -0300 Subject: [PATCH 05/13] refactor: get actual rev in _get_materials_doc (#9741) --- ietf/meeting/views.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index 8cc746a74b..fe6104db72 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -57,7 +57,8 @@ import debug # pyflakes:ignore from ietf.doc.fields import SearchableDocumentsField -from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent, StoredObject +from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent, \ + StoredObject, DocHistory from ietf.doc.storage_utils import ( remove_from_storage, retrieve_bytes, @@ -258,7 +259,7 @@ def current_materials(request): raise Http404('No such meeting') -def _get_materials_doc(name, meeting=None): +def _get_materials_doc(name, meeting=None) -> tuple[Document | DocHistory, str | None]: """Get meeting materials document named by name Raises Document.DoesNotExist if a match cannot be found. If meeting is None, @@ -279,7 +280,11 @@ def _matches_meeting(doc, meeting=None): if "-" in name: docname, rev = name.rsplit("-", 1) if len(rev) == 2 and rev.isdigit(): - doc = Document.objects.get(name=docname) # may raise Document.DoesNotExist + try: + doc = DocHistory.objects.get(name=docname, rev=rev) + except DocHistory.DoesNotExist: + # may raise Document.DoesNotExist + doc = Document.objects.get(name=docname, rev=rev) if ( _matches_meeting(doc, meeting) and rev in doc.revisions_by_newrevisionevent() From ac51b1374842fdc26a1e55e55ad351120fa3c795 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Fri, 17 Oct 2025 10:58:43 -0300 Subject: [PATCH 06/13] fix: return filename, not full path --- ietf/meeting/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index fe6104db72..af3fe4b643 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -466,7 +466,7 @@ def _response(bucket: str, name: str): filename = Path(doc.get_file_path()) / basename file_ext_choices = { # Construct a map from suffix to full filename - fn.suffix: str(fn) + fn.suffix: fn.name for fn in sorted(filename.parent.glob(filename.stem + ".*")) } From a5a8f7a3f9a20a7693da4511677b37f616683125 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Fri, 17 Oct 2025 18:39:20 -0300 Subject: [PATCH 07/13] feat: precompute blob lookups for meeting materials (#9746) * feat: ResolvedMaterial model + migration * feat: method to populate ResolvedMaterial (WIP) * refactor: don't delete ResolvedMaterials Instead of deleting the ResolvedMaterials for a meeting, which might lose updates made during processing, update existing rows with any changes and warn if anything changed during the process. * fix: fix _get_materials_doc() Did not handle the possibility of multiple DocHistory objects with the same rev. * refactor: factor out material lookup helper * feat: resolve blobs via blobdb/fs for cache * chore: add resource * feat: admin for ResolvedMaterial * feat: cache-driven resolve materials API * fix: add all ResolvedMaterials; var names * fix: handle null case * feat: resolve_meeting_materials_task --- ietf/api/urls.py | 1 + ietf/meeting/admin.py | 12 +- .../0017_resolvedmaterial_and_more.py | 48 +++++ ietf/meeting/models.py | 21 ++ ietf/meeting/resources.py | 31 ++- ietf/meeting/tasks.py | 25 ++- ietf/meeting/utils.py | 194 +++++++++++++++++- ietf/meeting/views.py | 151 ++++++-------- 8 files changed, 380 insertions(+), 103 deletions(-) create mode 100644 ietf/meeting/migrations/0017_resolvedmaterial_and_more.py diff --git a/ietf/api/urls.py b/ietf/api/urls.py index b6bf29588e..6c9740f115 100644 --- a/ietf/api/urls.py +++ b/ietf/api/urls.py @@ -51,6 +51,7 @@ url(r'^iesg/position', views_ballot.api_set_position), # Find the blob to store for a given materials document path url(r'^meeting/(?:(?P(?:interim-)?[a-z0-9-]+)/)?materials/%(document)s(?P\.[A-Za-z0-9]+)?/resolve/$' % settings.URL_REGEXPS, meeting_views.api_resolve_materials_name), + url(r'^meeting/(?:(?P(?:interim-)?[a-z0-9-]+)/)?materials/%(document)s(?P\.[A-Za-z0-9]+)?/resolve-cached/$' % settings.URL_REGEXPS, meeting_views.api_resolve_materials_name_cached), url(r'^meeting/blob/(?P[a-z0-9-]+)/(?P[a-z][a-z0-9.-]+)$', meeting_views.api_retrieve_materials_blob), # Let Meetecho set session video URLs url(r'^meeting/session/video/url$', meeting_views.api_set_session_video_url), diff --git a/ietf/meeting/admin.py b/ietf/meeting/admin.py index d886a9a4b6..248838df18 100644 --- a/ietf/meeting/admin.py +++ b/ietf/meeting/admin.py @@ -9,7 +9,7 @@ SchedTimeSessAssignment, ResourceAssociation, FloorPlan, UrlResource, SessionPresentation, ImportantDate, SlideSubmission, SchedulingEvent, BusinessConstraint, ProceedingsMaterial, MeetingHost, Registration, RegistrationTicket, - AttendanceTypeName) + AttendanceTypeName, ResolvedMaterial) class UrlResourceAdmin(admin.ModelAdmin): @@ -288,3 +288,13 @@ def display_meeting(self, instance): display_meeting.short_description = "Meeting" # type: ignore # https://github.com/python/mypy/issues/2087 admin.site.register(RegistrationTicket, RegistrationTicketAdmin) + + +class ResolvedMaterialAdmin(admin.ModelAdmin): + model = ResolvedMaterial + list_display = ["name", "meeting_number", "bucket", "blob"] + list_filter = ["meeting_number", "bucket"] + search_fields = ["name", "blob"] + ordering = ["name"] + +admin.site.register(ResolvedMaterial, ResolvedMaterialAdmin) diff --git a/ietf/meeting/migrations/0017_resolvedmaterial_and_more.py b/ietf/meeting/migrations/0017_resolvedmaterial_and_more.py new file mode 100644 index 0000000000..4a46c0c3ff --- /dev/null +++ b/ietf/meeting/migrations/0017_resolvedmaterial_and_more.py @@ -0,0 +1,48 @@ +# Copyright The IETF Trust 2025, All Rights Reserved + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("meeting", "0016_alter_meeting_country_alter_meeting_time_zone"), + ] + + operations = [ + migrations.CreateModel( + name="ResolvedMaterial", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(help_text="Name to resolve", max_length=300)), + ( + "meeting_number", + models.CharField( + help_text="Meeting material is related to", max_length=64 + ), + ), + ( + "bucket", + models.CharField(help_text="Resolved bucket name", max_length=255), + ), + ( + "blob", + models.CharField(help_text="Resolved blob name", max_length=300), + ), + ], + ), + migrations.AddConstraint( + model_name="resolvedmaterial", + constraint=models.UniqueConstraint( + fields=("name", "meeting_number"), name="unique_name_per_meeting" + ), + ), + ] diff --git a/ietf/meeting/models.py b/ietf/meeting/models.py index 9e44df33b7..c80544220b 100644 --- a/ietf/meeting/models.py +++ b/ietf/meeting/models.py @@ -956,6 +956,27 @@ class Meta: def __str__(self): return u"%s -> %s-%s" % (self.session, self.document.name, self.rev) + +class ResolvedMaterial(models.Model): + # A Document name can be 255 characters; allow this name to be a bit longer + name = models.CharField(max_length=300, help_text="Name to resolve") + meeting_number = models.CharField( + max_length=64, help_text="Meeting material is related to" + ) + bucket = models.CharField(max_length=255, help_text="Resolved bucket name") + blob = models.CharField(max_length=300, help_text="Resolved blob name") + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["name", "meeting_number"], name="unique_name_per_meeting" + ) + ] + + def __str__(self): + return f"{self.name}@{self.meeting_number} -> {self.bucket}:{self.blob}" + + constraint_cache_uses = 0 constraint_cache_initials = 0 diff --git a/ietf/meeting/resources.py b/ietf/meeting/resources.py index ede2b5b993..57be4487b6 100644 --- a/ietf/meeting/resources.py +++ b/ietf/meeting/resources.py @@ -11,11 +11,15 @@ from ietf import api -from ietf.meeting.models import ( Meeting, ResourceAssociation, Constraint, Room, Schedule, Session, - TimeSlot, SchedTimeSessAssignment, SessionPresentation, FloorPlan, - UrlResource, ImportantDate, SlideSubmission, SchedulingEvent, - BusinessConstraint, ProceedingsMaterial, MeetingHost, Attended, - Registration, RegistrationTicket) +from ietf.meeting.models import (Meeting, ResourceAssociation, Constraint, Room, + Schedule, Session, + TimeSlot, SchedTimeSessAssignment, SessionPresentation, + FloorPlan, + UrlResource, ImportantDate, SlideSubmission, + SchedulingEvent, + BusinessConstraint, ProceedingsMaterial, MeetingHost, + Attended, + Registration, RegistrationTicket, ResolvedMaterial) from ietf.name.resources import MeetingTypeNameResource class MeetingResource(ModelResource): @@ -472,3 +476,20 @@ class Meta: "registration": ALL_WITH_RELATIONS, } api.meeting.register(RegistrationTicketResource()) + + +class ResolvedMaterialResource(ModelResource): + class Meta: + queryset = ResolvedMaterial.objects.all() + serializer = api.Serializer() + cache = SimpleCache() + #resource_name = 'resolvedmaterial' + ordering = ['id', ] + filtering = { + "id": ALL, + "name": ALL, + "meeting_number": ALL, + "bucket": ALL, + "blob": ALL, + } +api.meeting.register(ResolvedMaterialResource()) diff --git a/ietf/meeting/tasks.py b/ietf/meeting/tasks.py index 784eb00d87..d35f78979d 100644 --- a/ietf/meeting/tasks.py +++ b/ietf/meeting/tasks.py @@ -2,12 +2,14 @@ # # Celery task definitions # +import datetime + from celery import shared_task from django.utils import timezone from ietf.utils import log from .models import Meeting -from .utils import generate_proceedings_content +from .utils import generate_proceedings_content, resolve_materials_for_one_meeting from .views import generate_agenda_data from .utils import fetch_attendance_from_meetings @@ -61,3 +63,24 @@ def fetch_meeting_attendance_task(): meeting_stats['processed'] ) ) + + +@shared_task +def resolve_meeting_materials_task(*, meetings=None, meetings_since=None): + if meetings_since is not None: + meetings_since = datetime.datetime.fromisoformat(meetings_since) + if meetings is None: + if meetings_since is None: + log.log("No meetings requested, doing nothing.") + return + meetings = Meeting.objects.filter(date__gte=meetings_since) + log.log(f"Resolving materials for meetings since {meetings_since}") + else: + if meetings_since is not None: + log.log("Ignoring meetings_since because specific meetings were requested.") + meetings = Meeting.objects.filter(number__in=meetings) + for meeting in meetings: + log.log(f"Resolving materials for {meeting.type_id} meeting {meeting.number}...") + mark = timezone.now() + resolve_materials_for_one_meeting(meeting) + log.log(f"Resolved in {(timezone.now() - mark).total_seconds():0.3f} seconds.") diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index feadb0c7fd..1d9da72fce 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -2,6 +2,8 @@ # -*- coding: utf-8 -*- import datetime import itertools +from dataclasses import dataclass + import jsonschema import os import requests @@ -17,7 +19,7 @@ from django.contrib import messages from django.core.cache import caches from django.core.files.base import ContentFile -from django.db import IntegrityError +from django.db import IntegrityError, transaction from django.db.models import OuterRef, Subquery, TextField, Q, Value, Max from django.db.models.functions import Coalesce from django.template.loader import render_to_string @@ -28,10 +30,27 @@ from ietf.dbtemplate.models import DBTemplate from ietf.doc.storage_utils import store_bytes, store_str -from ietf.meeting.models import (Session, SchedulingEvent, TimeSlot, - Constraint, SchedTimeSessAssignment, SessionPresentation, Attended, - Registration, Meeting, RegistrationTicket) -from ietf.doc.models import Document, State, NewRevisionDocEvent, StateDocEvent +from ietf.meeting.models import ( + Session, + SchedulingEvent, + TimeSlot, + Constraint, + SchedTimeSessAssignment, + SessionPresentation, + Attended, + Registration, + Meeting, + RegistrationTicket, + ResolvedMaterial, +) +from ietf.doc.models import ( + Document, + State, + NewRevisionDocEvent, + StateDocEvent, + DocHistory, + StoredObject, +) from ietf.doc.models import DocEvent from ietf.group.models import Group from ietf.group.utils import can_manage_materials @@ -833,6 +852,171 @@ def write_doc_for_session(session, type_id, filename, contents): store_str(type_id, filename.name, contents) return None + +@dataclass +class BlobSpec: + bucket: str + name: str + + +def resolve_one_material( + doc: Document | DocHistory, rev: str | None, ext: str | None +) -> BlobSpec | None: + # Get the Document's base name. It may or may not have an extension. + if rev is None: + basename = Path(doc.get_base_name()) + else: + basename = Path(f"{doc.name}-{int(rev):02d}") + + # If we have an extension, either from the URL or the Document's base name, look up + # the blob or file or return 404. + if ext or basename.suffix != "": + if ext: + basename = basename.with_suffix(ext) + + # See if we have a stored object under that name + preferred_blob = ( + StoredObject.objects.exclude_deleted() + .filter(store=doc.type_id, name=basename) + .first() + ) + if preferred_blob is not None: + return BlobSpec( + bucket=preferred_blob.store, + name=preferred_blob.name, + ) + # No stored object, fall back to the file system. + filename = Path(doc.get_file_path()) / basename + if filename.exists(): + return BlobSpec( + bucket=doc.type_id, + name=str(basename), + ) + else: + return None + + # No extension has been specified so far, so look one up. + matching_stored_objects = ( + StoredObject.objects.exclude_deleted() + .filter( + store=doc.type_id, + name__startswith=f"{basename.stem}.", # anchor to end with trailing "." + ) + .order_by("name") + ) # orders by suffix + blob_ext_choices = { + Path(stored_obj.name).suffix: stored_obj + for stored_obj in matching_stored_objects + } + + # Short-circuit to return pdf if present + if ".pdf" in blob_ext_choices: + pdf_blob = blob_ext_choices[".pdf"] + return BlobSpec( + bucket=pdf_blob.store, + name=pdf_blob.name, + ) + + # Now look for files + filename = Path(doc.get_file_path()) / basename + file_ext_choices = { + # Construct a map from suffix to full filename + fn.suffix: fn.name + for fn in sorted(filename.parent.glob(filename.stem + ".*")) + } + + # Short-circuit to return pdf if we have the file + if ".pdf" in file_ext_choices: + pdf_filename = file_ext_choices[".pdf"] + return BlobSpec( + bucket=doc.type_id, + name=pdf_filename, + ) + + all_exts = set(blob_ext_choices.keys()).union(file_ext_choices.keys()) + if len(all_exts) > 0: + preferred_ext = sorted(all_exts)[0] + if preferred_ext in blob_ext_choices: + preferred_blob = blob_ext_choices[preferred_ext] + return BlobSpec( + bucket=preferred_blob.store, + name=preferred_blob.name, + ) + else: + preferred_filename = file_ext_choices[preferred_ext] + return BlobSpec( + bucket=doc.type_id, + name=preferred_filename, + ) + + return None + + +def resolve_materials_for_one_meeting(meeting: Meeting): + start_time = timezone.now() + meeting_documents = ( + Document.objects.exclude(type_id="draft").filter( + Q(session__meeting=meeting) | Q(proceedingsmaterial__meeting=meeting) + ) + ).distinct() + + resolved = [] + for doc in meeting_documents: + # request by doc name with no rev + blob = resolve_one_material(doc, rev=None, ext=None) + if blob is not None: + resolved.append( + ResolvedMaterial( + name=doc.name, + meeting_number=meeting.number, + bucket=blob.bucket, + blob=blob.name, + ) + ) + # request by doc name + rev + blob = resolve_one_material(doc, rev=doc.rev, ext=None) + if blob is not None: + resolved.append( + ResolvedMaterial( + name=f"{doc.name}-{doc.rev:02}", + meeting_number=meeting.number, + bucket=blob.bucket, + blob=blob.name, + ) + ) + # for other revisions, only need request by doc name + rev + other_revisions = doc.revisions_by_newrevisionevent() + other_revisions.remove(doc.rev) + for rev in other_revisions: + old_doc = DocHistory.objects.filter( + doc=doc, rev=rev + ).order_by("-time").first() + if old_doc is None: + continue + blob = resolve_one_material(old_doc, rev=rev, ext=None) + if blob is not None: + resolved.append( + ResolvedMaterial( + name=f"{doc.name}-{rev:02}", + meeting_number=meeting.number, + bucket=blob.bucket, + blob=blob.name, + ) + ) + ResolvedMaterial.objects.bulk_create( + resolved, + update_conflicts=True, + unique_fields=["name", "meeting_number"], + update_fields=["bucket", "blob"], + ) + # Warn if any files were updated during the above process + last_update = meeting_documents.aggregate(Max("time"))["time__max"] + if last_update and last_update > start_time: + log( + f"Warning: materials for meeting {meeting.number} " + "changed during ResolvedMaterial update" + ) + def create_recording(session, url, title=None, user=None): ''' Creates the Document type=recording, setting external_url and creating diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index af3fe4b643..271f66755a 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -52,13 +52,12 @@ from django.views.decorators.cache import cache_page from django.views.decorators.csrf import ensure_csrf_cookie, csrf_exempt from django.views.generic import RedirectView -from rest_framework.status import HTTP_404_NOT_FOUND, HTTP_400_BAD_REQUEST +from rest_framework.status import HTTP_404_NOT_FOUND import debug # pyflakes:ignore from ietf.doc.fields import SearchableDocumentsField -from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent, \ - StoredObject, DocHistory +from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent, DocHistory from ietf.doc.storage_utils import ( remove_from_storage, retrieve_bytes, @@ -71,7 +70,8 @@ from ietf.person.models import Person, User from ietf.ietfauth.utils import role_required, has_role, user_is_person from ietf.mailtrigger.utils import gather_address_lists -from ietf.meeting.models import Meeting, Session, Schedule, FloorPlan, SessionPresentation, TimeSlot, SlideSubmission, Attended +from ietf.meeting.models import Meeting, Session, Schedule, FloorPlan, \ + SessionPresentation, TimeSlot, SlideSubmission, Attended, ResolvedMaterial from ietf.meeting.models import ImportantDate, SessionStatusName, SchedulingEvent, SchedTimeSessAssignment, Room, TimeSlotTypeName from ietf.meeting.models import Registration from ietf.meeting.forms import ( CustomDurationField, SwapDaysForm, SwapTimeslotsForm, ImportMinutesForm, @@ -96,6 +96,7 @@ generate_proceedings_content, organize_proceedings_sessions, sort_accept_tuple, + resolve_one_material, ) from ietf.meeting.utils import add_event_info_to_session_qs from ietf.meeting.utils import session_time_for_sorting @@ -281,10 +282,14 @@ def _matches_meeting(doc, meeting=None): docname, rev = name.rsplit("-", 1) if len(rev) == 2 and rev.isdigit(): try: - doc = DocHistory.objects.get(name=docname, rev=rev) - except DocHistory.DoesNotExist: # may raise Document.DoesNotExist doc = Document.objects.get(name=docname, rev=rev) + except Document.DoesNotExist: + doc = DocHistory.objects.filter( + name=docname, rev=rev, + ).order_by("-time").first() + if doc is None: + raise if ( _matches_meeting(doc, meeting) and rev in doc.revisions_by_newrevisionevent() @@ -367,16 +372,17 @@ def materials_document(request, document, num=None, ext=None): @requires_api_token def api_resolve_materials_name(request, document, num=None, ext=None): """Resolve materials name into document to a blob spec - + Returns the bucket/name of a blob in the blob store that corresponds to the named document. Handles resolution of revision if it is not specified and determines the best extension if one is not provided. Response is JSON. - + As of 2025-10-10 we do not have blobs for all materials documents or for every format of every document. This API still returns the bucket/name as if the blob exists. Another API will allow the caller to obtain the file contents using that name if it cannot be retrieved from the blob store. """ + def _error_response(status: int, detail: str): return JsonResponse( { @@ -386,7 +392,7 @@ def _error_response(status: int, detail: str): }, status=status, ) - + def _response(bucket: str, name: str): return JsonResponse( { @@ -410,93 +416,56 @@ def _response(bucket: str, name: str): HTTP_404_NOT_FOUND, f"No such document for meeting {num}" ) - # Get the Document's base name. It may or may not have an extension. - if rev is None: - basename = Path(doc.get_base_name()) - else: - basename = Path(f"{doc.name}-{int(rev):02d}") + resolved = resolve_one_material(doc, rev, ext) + if resolved is not None: + return _response(bucket=resolved.bucket, name=resolved.name) - # If we have an extension, either from the URL or the Document's base name, look up - # the blob or file or return 404. - if ext or basename.suffix != "": - if ext: - basename = basename.with_suffix(ext) - - # See if we have a stored object under that name - blob = StoredObject.objects.exclude_deleted().filter( - store=doc.type_id, name=basename - ).first() - if blob is not None: - return _response( - bucket=blob.store, - name=blob.name, - ) - # No stored object, fall back to the file system. - filename = Path(doc.get_file_path()) / basename - if filename.exists(): - return _response( - bucket=doc.type_id, - name=str(basename), - ) - else: - return _error_response( - HTTP_404_NOT_FOUND, - f"No file {basename} available for {document} for meeting {num}", - ) - - # No extension has been specified so far, so look one up. - matching_stored_objects = StoredObject.objects.exclude_deleted().filter( - store=doc.type_id, - name__startswith=f"{basename.stem}." # anchor to end with trailing "." - ).order_by("name") # orders by suffix - blob_ext_choices = { - Path(stored_obj.name).suffix: stored_obj - for stored_obj in matching_stored_objects - } - - # Short-circuit to return pdf if present - if ".pdf" in blob_ext_choices: - pdf_blob = blob_ext_choices[".pdf"] - return _response( - bucket=pdf_blob.store, - name=pdf_blob.name, + return _error_response( + HTTP_404_NOT_FOUND, f"No suitable file for {document} for meeting {num}" + ) + + +@requires_api_token("ietf.meeting.views.api_resolve_materials_name") +def api_resolve_materials_name_cached(request, document, num=None, ext=None): + """Resolve materials name into document to a blob spec + + Returns the bucket/name of a blob in the blob store that corresponds to the named + document. Handles resolution of revision if it is not specified and determines the + best extension if one is not provided. Response is JSON. + + As of 2025-10-10 we do not have blobs for all materials documents or for every + format of every document. This API still returns the bucket/name as if the blob + exists. Another API will allow the caller to obtain the file contents using that + name if it cannot be retrieved from the blob store. + """ + + def _error_response(status: int, detail: str): + return JsonResponse( + { + "status": status, + "title": "Error", + "detail": detail, + }, + status=status, ) - # Now look for files - filename = Path(doc.get_file_path()) / basename - file_ext_choices = { - # Construct a map from suffix to full filename - fn.suffix: fn.name - for fn in sorted(filename.parent.glob(filename.stem + ".*")) - } - - # Short-circuit to return pdf if we have the file - if ".pdf" in file_ext_choices: - pdf_filename = file_ext_choices[".pdf"] - return _response( - bucket=doc.type_id, - name=pdf_filename, + def _response(bucket: str, name: str): + return JsonResponse( + { + "bucket": bucket, + "name": name, + } ) - - all_exts = set(blob_ext_choices.keys()).union(file_ext_choices.keys()) - if len(all_exts) > 0: - preferred_ext = sorted(all_exts)[0] - if preferred_ext in blob_ext_choices: - pdf_blob = blob_ext_choices[preferred_ext] - return _response( - bucket=pdf_blob.store, - name=pdf_blob.name, - ) - else: - pdf_filename = file_ext_choices[preferred_ext] - return _response( - bucket=doc.type_id, - name=pdf_filename, - ) - return _error_response( - HTTP_404_NOT_FOUND, f"No suitable file for {document} for meeting {num}" - ) + try: + resolved = ResolvedMaterial.objects.get( + meeting_number=num, name=document + ) + except ResolvedMaterial.DoesNotExist: + return _error_response( + HTTP_404_NOT_FOUND, f"No suitable file for {document} for meeting {num}" + ) + return _response(bucket=resolved.bucket, name=resolved.blob) @requires_api_token From ed8505c5f6196bb791c33a2b802e370434306e64 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Mon, 20 Oct 2025 14:23:49 -0500 Subject: [PATCH 08/13] feat: update resolver cache on material upload (#9759) --- ietf/doc/views_material.py | 4 ++++ ietf/meeting/utils.py | 33 ++++++++++++++++++++++++++++++++- ietf/meeting/views.py | 8 ++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/ietf/doc/views_material.py b/ietf/doc/views_material.py index 6f8b8a8f12..eefac0ca61 100644 --- a/ietf/doc/views_material.py +++ b/ietf/doc/views_material.py @@ -22,6 +22,7 @@ from ietf.doc.utils import add_state_change_event, check_common_doc_name_rules from ietf.group.models import Group from ietf.group.utils import can_manage_materials +from ietf.meeting.utils import resolve_uploaded_material from ietf.utils import log from ietf.utils.decorators import ignore_view_kwargs from ietf.utils.meetecho import MeetechoAPIError, SlidesManager @@ -179,6 +180,9 @@ def edit_material(request, name=None, acronym=None, action=None, doc_type=None): "There was an error creating a hardlink at %s pointing to %s: %s" % (ftp_filepath, filepath, ex) ) + else: + for meeting in set([s.meeting for s in doc.session_set.all()]): + resolve_uploaded_material(meeting=meeting, doc=doc) if prev_rev != doc.rev: e = NewRevisionDocEvent(type="new_revision", doc=doc, rev=doc.rev) diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index 1d9da72fce..1d9a622003 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -19,7 +19,7 @@ from django.contrib import messages from django.core.cache import caches from django.core.files.base import ContentFile -from django.db import IntegrityError, transaction +from django.db import IntegrityError from django.db.models import OuterRef, Subquery, TextField, Q, Value, Max from django.db.models.functions import Coalesce from django.template.loader import render_to_string @@ -240,6 +240,7 @@ def save_bluesheet(request, session, file, encoding='utf-8'): save_error = handle_upload_file(file, filename, session.meeting, 'bluesheets', request=request, encoding=encoding) if not save_error: doc.save_with_history([e]) + resolve_uploaded_material(meeting=session.meeting, doc=doc) return save_error @@ -1017,6 +1018,36 @@ def resolve_materials_for_one_meeting(meeting: Meeting): "changed during ResolvedMaterial update" ) +def resolve_uploaded_material(meeting: Meeting, doc: Document): + resolved = [] + blob = resolve_one_material(doc, rev=None, ext=None) + if blob is not None: + resolved.append( + ResolvedMaterial( + name=doc.name, + meeting_number=meeting.number, + bucket=blob.bucket, + blob=blob.name, + ) + ) + # request by doc name + rev + blob = resolve_one_material(doc, rev=doc.rev, ext=None) + if blob is not None: + resolved.append( + ResolvedMaterial( + name=f"{doc.name}-{doc.rev:02}", + meeting_number=meeting.number, + bucket=blob.bucket, + blob=blob.name, + ) + ) + ResolvedMaterial.objects.bulk_create( + resolved, + update_conflicts=True, + unique_fields=["name", "meeting_number"], + update_fields=["bucket", "blob"], + ) + def create_recording(session, url, title=None, user=None): ''' Creates the Document type=recording, setting external_url and creating diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index 271f66755a..ff315313de 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -95,6 +95,7 @@ finalize, generate_proceedings_content, organize_proceedings_sessions, + resolve_uploaded_material, sort_accept_tuple, resolve_one_material, ) @@ -3250,6 +3251,7 @@ def upload_session_minutes(request, session_id, num): form.add_error(None, str(err)) else: # no exception -- success! + resolve_uploaded_material(meeting=session.meeting, doc=session.minutes()) messages.success(request, f'Successfully uploaded minutes as revision {session.minutes().rev}.') return redirect('ietf.meeting.views.session_details', num=num, acronym=session.group.acronym) else: @@ -3309,6 +3311,7 @@ def upload_session_narrativeminutes(request, session_id, num): form.add_error(None, str(err)) else: # no exception -- success! + resolve_uploaded_material(meeting=session.meeting, doc=session.narrative_minutes()) messages.success(request, f'Successfully uploaded narrative minutes as revision {session.narrative_minutes().rev}.') return redirect('ietf.meeting.views.session_details', num=num, acronym=session.group.acronym) else: @@ -3455,6 +3458,7 @@ def upload_session_agenda(request, session_id, num): form.add_error(None, save_error) else: doc.save_with_history([e]) + resolve_uploaded_material(meeting=session.meeting, doc=doc) messages.success(request, f'Successfully uploaded agenda as revision {doc.rev}.') return redirect('ietf.meeting.views.session_details',num=num,acronym=session.group.acronym) else: @@ -3638,6 +3642,7 @@ def upload_session_slides(request, session_id, num, name=None): else: doc.save_with_history([e]) post_process(doc) + resolve_uploaded_material(meeting=session.meeting, doc=doc) # Send MeetEcho updates even if we had a problem saving - that will keep it in sync with the # SessionPresentation, which was already saved regardless of problems saving the file. @@ -5038,6 +5043,7 @@ def err(code, text): write_doc_for_session(session, 'chatlog', filename, json.dumps(apidata['chatlog'])) e = NewRevisionDocEvent.objects.create(doc=doc, rev=doc.rev, by=request.user.person, type='new_revision', desc='New revision available: %s'%doc.rev) doc.save_with_history([e]) + resolve_uploaded_material(meeting=session.meeting, doc=doc) return HttpResponse( "Done", status=200, @@ -5086,6 +5092,7 @@ def err(code, text): write_doc_for_session(session, 'polls', filename, json.dumps(apidata['polls'])) e = NewRevisionDocEvent.objects.create(doc=doc, rev=doc.rev, by=request.user.person, type='new_revision', desc='New revision available: %s'%doc.rev) doc.save_with_history([e]) + resolve_uploaded_material(meeting=session.meeting, doc=doc) return HttpResponse( "Done", status=200, @@ -5468,6 +5475,7 @@ def approve_proposed_slides(request, slidesubmission_id, num): doc.store_bytes(target_filename, retrieve_bytes("staging", submission.filename)) remove_from_storage("staging", submission.filename) post_process(doc) + resolve_uploaded_material(meeting=submission.session.meeting, doc=doc) DocEvent.objects.create(type="approved_slides", doc=doc, rev=doc.rev, by=request.user.person, desc="Slides approved") # update meetecho slide info if configured From cf008eaae04ec061845bf5475c901221ebd6a6b8 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 20 Oct 2025 16:24:10 -0300 Subject: [PATCH 09/13] feat: robustness + date range for resolve materials task (#9760) * fix: limit types added to ResolvedMaterial * feat: resolve meeting materials in order by date * feat: add meetings_until param * fix: log&continue if resolving fails on a meeting * feat: log error message on parse errors --- ietf/meeting/tasks.py | 69 +++++++++++++++++++++++++++++++++++++------ ietf/meeting/utils.py | 4 ++- ietf/meeting/views.py | 15 +++------- ietf/settings.py | 14 +++++++++ 4 files changed, 81 insertions(+), 21 deletions(-) diff --git a/ietf/meeting/tasks.py b/ietf/meeting/tasks.py index d35f78979d..d67c1c2869 100644 --- a/ietf/meeting/tasks.py +++ b/ietf/meeting/tasks.py @@ -66,21 +66,72 @@ def fetch_meeting_attendance_task(): @shared_task -def resolve_meeting_materials_task(*, meetings=None, meetings_since=None): - if meetings_since is not None: - meetings_since = datetime.datetime.fromisoformat(meetings_since) +def resolve_meeting_materials_task( + *, meetings: list[str]=None, meetings_since: str=None, meetings_until: str=None +): + """Run materials resolver on meetings + + Can request a set of meetings by number by passing a list in the meetings arg, or + by range by passing an iso-format timestamps in meetings_since / meetings_until. + To select all meetings, set meetings_since="zero" and omit other parameters. + """ + # IETF-1 = 1986-01-16 + EARLIEST_MEETING_DATE = datetime.datetime(1986, 1, 1) + if meetings_since == "zero": + meetings_since = EARLIEST_MEETING_DATE + elif meetings_since is not None: + try: + meetings_since = datetime.datetime.fromisoformat(meetings_since) + except ValueError: + log.log( + "Failed to parse meetings_since='{meetings_since}' with fromisoformat" + ) + raise + + if meetings_until is not None: + try: + meetings_until = datetime.datetime.fromisoformat(meetings_until) + except ValueError: + log.log( + "Failed to parse meetings_until='{meetings_until}' with fromisoformat" + ) + raise + if meetings_since is None: + # if we only got meetings_until, start from the first meeting + meetings_since = EARLIEST_MEETING_DATE + if meetings is None: if meetings_since is None: log.log("No meetings requested, doing nothing.") return meetings = Meeting.objects.filter(date__gte=meetings_since) - log.log(f"Resolving materials for meetings since {meetings_since}") + if meetings_until is not None: + meetings = meetings.filter(date__lte=meetings_until) + log.log( + "Resolving materials for meetings " + f"between {meetings_since} and {meetings_until}" + ) + else: + log.log(f"Resolving materials for meetings since {meetings_since}") else: if meetings_since is not None: - log.log("Ignoring meetings_since because specific meetings were requested.") + log.log( + "Ignoring meetings_since and meetings_until " + "because specific meetings were requested." + ) meetings = Meeting.objects.filter(number__in=meetings) - for meeting in meetings: - log.log(f"Resolving materials for {meeting.type_id} meeting {meeting.number}...") + for meeting in meetings.order_by("date"): + log.log( + f"Resolving materials for {meeting.type_id} " + f"meeting {meeting.number} ({meeting.date})..." + ) mark = timezone.now() - resolve_materials_for_one_meeting(meeting) - log.log(f"Resolved in {(timezone.now() - mark).total_seconds():0.3f} seconds.") + try: + resolve_materials_for_one_meeting(meeting) + except Exception as err: + log.log( + "Exception raised while resolving materials for " + f"meeting {meeting.number}: {err}" + ) + else: + log.log(f"Resolved in {(timezone.now() - mark).total_seconds():0.3f} seconds.") diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index 1d9a622003..bf56a8be73 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -956,7 +956,9 @@ def resolve_one_material( def resolve_materials_for_one_meeting(meeting: Meeting): start_time = timezone.now() meeting_documents = ( - Document.objects.exclude(type_id="draft").filter( + Document.objects.filter( + type_id__in=settings.MATERIALS_TYPES_SERVED_BY_WORKER + ).filter( Q(session__meeting=meeting) | Q(proceedingsmaterial__meeting=meeting) ) ).distinct() diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index ff315313de..35ec2a2f44 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -485,16 +485,6 @@ def api_retrieve_materials_blob(request, bucket, name): but a file with the same name but extension `.md` does, `.md` file will be rendered from markdown to html and returned / stored. """ - ALLOWED_BUCKETS = { - "agenda", - "bluesheets", - "chatlog", - "minutes", - "narrativeminutes", - "polls", - "procmaterials", - "slides", - } DEFAULT_CONTENT_TYPES = { ".html": "text/html;charset=utf-8", ".md": "text/markdown;charset=utf-8", @@ -505,7 +495,10 @@ def api_retrieve_materials_blob(request, bucket, name): def _default_content_type(blob_name: str): return DEFAULT_CONTENT_TYPES.get(Path(name).suffix, "application/octet-stream") - if not settings.ENABLE_BLOBSTORAGE or bucket not in ALLOWED_BUCKETS: + if not ( + settings.ENABLE_BLOBSTORAGE + and bucket in settings.MATERIALS_TYPES_SERVED_BY_WORKER + ): return HttpResponseNotFound(f"Bucket {bucket} not found.") storage = storages[bucket] # if not configured, a server error will result assert isinstance(storage, BlobdbStorage) diff --git a/ietf/settings.py b/ietf/settings.py index 466b7ea9d7..450e2dc7fa 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -816,6 +816,20 @@ def skip_unreadable_post(record): "OPTIONS": {"bucket_name": storagename}, } +# Buckets / doc types of meeting materials the CF worker is allowed to serve. This +# differs from the list in Session.meeting_related() by the omission of "recording" +MATERIALS_TYPES_SERVED_BY_WORKER = [ + "agenda", + "bluesheets", + "chatlog", + "minutes", + "narrativeminutes", + "polls", + "procmaterials", + "slides", +] + + # Override this in settings_local.py if needed # *_PATH variables ends with a slash/ . From c02e4f9e09a44f21eaca30b18394cecf7c6364bf Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 20 Oct 2025 16:57:11 -0300 Subject: [PATCH 10/13] refactor: move ResolvedMaterial to blobdb app (#9762) * refactor: move ResolvedMaterial to blobdb app * fix: undo accidental removal --- ietf/blobdb/admin.py | 11 +++++++++- .../migrations/0002_resolvedmaterial.py} | 2 +- ietf/blobdb/models.py | 20 +++++++++++++++++++ ietf/meeting/admin.py | 12 +---------- ietf/meeting/models.py | 20 ------------------- ietf/meeting/resources.py | 19 +----------------- ietf/meeting/utils.py | 2 +- ietf/meeting/views.py | 3 ++- 8 files changed, 36 insertions(+), 53 deletions(-) rename ietf/{meeting/migrations/0017_resolvedmaterial_and_more.py => blobdb/migrations/0002_resolvedmaterial.py} (95%) diff --git a/ietf/blobdb/admin.py b/ietf/blobdb/admin.py index f4cd002e07..3e1a2a311f 100644 --- a/ietf/blobdb/admin.py +++ b/ietf/blobdb/admin.py @@ -3,7 +3,7 @@ from django.db.models.functions import Length from rangefilter.filters import DateRangeQuickSelectListFilterBuilder -from .models import Blob +from .models import Blob, ResolvedMaterial @admin.register(Blob) @@ -29,3 +29,12 @@ def get_queryset(self, request): def object_size(self, instance): """Get the size of the object""" return instance.object_size # annotation added in get_queryset() + + +@admin.register(ResolvedMaterial) +class ResolvedMaterialAdmin(admin.ModelAdmin): + model = ResolvedMaterial + list_display = ["name", "meeting_number", "bucket", "blob"] + list_filter = ["meeting_number", "bucket"] + search_fields = ["name", "blob"] + ordering = ["name"] diff --git a/ietf/meeting/migrations/0017_resolvedmaterial_and_more.py b/ietf/blobdb/migrations/0002_resolvedmaterial.py similarity index 95% rename from ietf/meeting/migrations/0017_resolvedmaterial_and_more.py rename to ietf/blobdb/migrations/0002_resolvedmaterial.py index 4a46c0c3ff..2fcfdbca4c 100644 --- a/ietf/meeting/migrations/0017_resolvedmaterial_and_more.py +++ b/ietf/blobdb/migrations/0002_resolvedmaterial.py @@ -6,7 +6,7 @@ class Migration(migrations.Migration): dependencies = [ - ("meeting", "0016_alter_meeting_country_alter_meeting_time_zone"), + ("blobdb", "0001_initial"), ] operations = [ diff --git a/ietf/blobdb/models.py b/ietf/blobdb/models.py index 8f423d9f6c..fa7831f203 100644 --- a/ietf/blobdb/models.py +++ b/ietf/blobdb/models.py @@ -96,3 +96,23 @@ def _emit_blob_change_event(self, using=None): ), using=using, ) + + +class ResolvedMaterial(models.Model): + # A Document name can be 255 characters; allow this name to be a bit longer + name = models.CharField(max_length=300, help_text="Name to resolve") + meeting_number = models.CharField( + max_length=64, help_text="Meeting material is related to" + ) + bucket = models.CharField(max_length=255, help_text="Resolved bucket name") + blob = models.CharField(max_length=300, help_text="Resolved blob name") + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["name", "meeting_number"], name="unique_name_per_meeting" + ) + ] + + def __str__(self): + return f"{self.name}@{self.meeting_number} -> {self.bucket}:{self.blob}" diff --git a/ietf/meeting/admin.py b/ietf/meeting/admin.py index 248838df18..d886a9a4b6 100644 --- a/ietf/meeting/admin.py +++ b/ietf/meeting/admin.py @@ -9,7 +9,7 @@ SchedTimeSessAssignment, ResourceAssociation, FloorPlan, UrlResource, SessionPresentation, ImportantDate, SlideSubmission, SchedulingEvent, BusinessConstraint, ProceedingsMaterial, MeetingHost, Registration, RegistrationTicket, - AttendanceTypeName, ResolvedMaterial) + AttendanceTypeName) class UrlResourceAdmin(admin.ModelAdmin): @@ -288,13 +288,3 @@ def display_meeting(self, instance): display_meeting.short_description = "Meeting" # type: ignore # https://github.com/python/mypy/issues/2087 admin.site.register(RegistrationTicket, RegistrationTicketAdmin) - - -class ResolvedMaterialAdmin(admin.ModelAdmin): - model = ResolvedMaterial - list_display = ["name", "meeting_number", "bucket", "blob"] - list_filter = ["meeting_number", "bucket"] - search_fields = ["name", "blob"] - ordering = ["name"] - -admin.site.register(ResolvedMaterial, ResolvedMaterialAdmin) diff --git a/ietf/meeting/models.py b/ietf/meeting/models.py index c80544220b..1af553a188 100644 --- a/ietf/meeting/models.py +++ b/ietf/meeting/models.py @@ -957,26 +957,6 @@ def __str__(self): return u"%s -> %s-%s" % (self.session, self.document.name, self.rev) -class ResolvedMaterial(models.Model): - # A Document name can be 255 characters; allow this name to be a bit longer - name = models.CharField(max_length=300, help_text="Name to resolve") - meeting_number = models.CharField( - max_length=64, help_text="Meeting material is related to" - ) - bucket = models.CharField(max_length=255, help_text="Resolved bucket name") - blob = models.CharField(max_length=300, help_text="Resolved blob name") - - class Meta: - constraints = [ - models.UniqueConstraint( - fields=["name", "meeting_number"], name="unique_name_per_meeting" - ) - ] - - def __str__(self): - return f"{self.name}@{self.meeting_number} -> {self.bucket}:{self.blob}" - - constraint_cache_uses = 0 constraint_cache_initials = 0 diff --git a/ietf/meeting/resources.py b/ietf/meeting/resources.py index 57be4487b6..88562a88fe 100644 --- a/ietf/meeting/resources.py +++ b/ietf/meeting/resources.py @@ -19,7 +19,7 @@ SchedulingEvent, BusinessConstraint, ProceedingsMaterial, MeetingHost, Attended, - Registration, RegistrationTicket, ResolvedMaterial) + Registration, RegistrationTicket) from ietf.name.resources import MeetingTypeNameResource class MeetingResource(ModelResource): @@ -476,20 +476,3 @@ class Meta: "registration": ALL_WITH_RELATIONS, } api.meeting.register(RegistrationTicketResource()) - - -class ResolvedMaterialResource(ModelResource): - class Meta: - queryset = ResolvedMaterial.objects.all() - serializer = api.Serializer() - cache = SimpleCache() - #resource_name = 'resolvedmaterial' - ordering = ['id', ] - filtering = { - "id": ALL, - "name": ALL, - "meeting_number": ALL, - "bucket": ALL, - "blob": ALL, - } -api.meeting.register(ResolvedMaterialResource()) diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index bf56a8be73..719c604e70 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -41,8 +41,8 @@ Registration, Meeting, RegistrationTicket, - ResolvedMaterial, ) +from ietf.blobdb.models import ResolvedMaterial from ietf.doc.models import ( Document, State, diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index 35ec2a2f44..38696a51b9 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -71,7 +71,8 @@ from ietf.ietfauth.utils import role_required, has_role, user_is_person from ietf.mailtrigger.utils import gather_address_lists from ietf.meeting.models import Meeting, Session, Schedule, FloorPlan, \ - SessionPresentation, TimeSlot, SlideSubmission, Attended, ResolvedMaterial + SessionPresentation, TimeSlot, SlideSubmission, Attended +from ..blobdb.models import ResolvedMaterial from ietf.meeting.models import ImportantDate, SessionStatusName, SchedulingEvent, SchedTimeSessAssignment, Room, TimeSlotTypeName from ietf.meeting.models import Registration from ietf.meeting.forms import ( CustomDurationField, SwapDaysForm, SwapTimeslotsForm, ImportMinutesForm, From 8a2175e384168a8e5ebc1c7ceae1f101dfceef41 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 21 Oct 2025 15:45:45 -0300 Subject: [PATCH 11/13] chore: fix lint (#9767) --- ietf/meeting/tasks.py | 36 +++++++++++++++++++++--------------- ietf/meeting/utils.py | 4 ++++ ietf/meeting/views.py | 2 +- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/ietf/meeting/tasks.py b/ietf/meeting/tasks.py index d67c1c2869..f5186e6bc2 100644 --- a/ietf/meeting/tasks.py +++ b/ietf/meeting/tasks.py @@ -67,7 +67,10 @@ def fetch_meeting_attendance_task(): @shared_task def resolve_meeting_materials_task( - *, meetings: list[str]=None, meetings_since: str=None, meetings_until: str=None + *, # only allow kw arguments + meetings: list[str] | None=None, + meetings_since: str | None=None, + meetings_until: str | None=None ): """Run materials resolver on meetings @@ -77,11 +80,14 @@ def resolve_meeting_materials_task( """ # IETF-1 = 1986-01-16 EARLIEST_MEETING_DATE = datetime.datetime(1986, 1, 1) + meetings_since_dt: datetime.datetime | None = None + meetings_until_dt: datetime.datetime | None = None + if meetings_since == "zero": - meetings_since = EARLIEST_MEETING_DATE + meetings_since_dt = EARLIEST_MEETING_DATE elif meetings_since is not None: try: - meetings_since = datetime.datetime.fromisoformat(meetings_since) + meetings_since_dt = datetime.datetime.fromisoformat(meetings_since) except ValueError: log.log( "Failed to parse meetings_since='{meetings_since}' with fromisoformat" @@ -90,37 +96,37 @@ def resolve_meeting_materials_task( if meetings_until is not None: try: - meetings_until = datetime.datetime.fromisoformat(meetings_until) + meetings_until_dt = datetime.datetime.fromisoformat(meetings_until) except ValueError: log.log( "Failed to parse meetings_until='{meetings_until}' with fromisoformat" ) raise - if meetings_since is None: + if meetings_since_dt is None: # if we only got meetings_until, start from the first meeting - meetings_since = EARLIEST_MEETING_DATE + meetings_since_dt = EARLIEST_MEETING_DATE if meetings is None: - if meetings_since is None: + if meetings_since_dt is None: log.log("No meetings requested, doing nothing.") return - meetings = Meeting.objects.filter(date__gte=meetings_since) - if meetings_until is not None: - meetings = meetings.filter(date__lte=meetings_until) + meetings_qs = Meeting.objects.filter(date__gte=meetings_since_dt) + if meetings_until_dt is not None: + meetings_qs = meetings_qs.filter(date__lte=meetings_until_dt) log.log( "Resolving materials for meetings " - f"between {meetings_since} and {meetings_until}" + f"between {meetings_since_dt} and {meetings_until_dt}" ) else: - log.log(f"Resolving materials for meetings since {meetings_since}") + log.log(f"Resolving materials for meetings since {meetings_since_dt}") else: - if meetings_since is not None: + if meetings_since_dt is not None: log.log( "Ignoring meetings_since and meetings_until " "because specific meetings were requested." ) - meetings = Meeting.objects.filter(number__in=meetings) - for meeting in meetings.order_by("date"): + meetings_qs = Meeting.objects.filter(number__in=meetings) + for meeting in meetings_qs.order_by("date"): log.log( f"Resolving materials for {meeting.type_id} " f"meeting {meeting.number} ({meeting.date})..." diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index 719c604e70..9f5e3d76f6 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -863,6 +863,10 @@ class BlobSpec: def resolve_one_material( doc: Document | DocHistory, rev: str | None, ext: str | None ) -> BlobSpec | None: + if doc.type_id is None: + log(f"Cannot resolve a doc with no type: {doc.name}") + return None + # Get the Document's base name. It may or may not have an extension. if rev is None: basename = Path(doc.get_base_name()) diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index 38696a51b9..6132540e3b 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -275,7 +275,7 @@ def _matches_meeting(doc, meeting=None): return doc.get_related_meeting() == meeting # try an exact match first - doc = Document.objects.filter(name=name).first() + doc: Document | DocHistory | None = Document.objects.filter(name=name).first() if doc is not None and _matches_meeting(doc, meeting): return doc, None From 5d2958aa878cfb98271d731b6bc9eb981c873c3a Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Wed, 22 Oct 2025 00:44:27 -0300 Subject: [PATCH 12/13] fix: don't use DocHistory to find materials (#9771) * fix: don't use DocHistory to validate revs The DocHistory records are incomplete and, in particular, -00 revs are often missing. * Revert "refactor: get actual rev in _get_materials_doc (#9741)" This reverts commit 7fd15801 * chore: remove the on-demand resolver api * chore: fix lint --- ietf/api/urls.py | 1 - ietf/meeting/utils.py | 10 ++---- ietf/meeting/views.py | 73 +++---------------------------------------- 3 files changed, 6 insertions(+), 78 deletions(-) diff --git a/ietf/api/urls.py b/ietf/api/urls.py index 6c9740f115..04575b34cb 100644 --- a/ietf/api/urls.py +++ b/ietf/api/urls.py @@ -50,7 +50,6 @@ # Let IESG members set positions programmatically url(r'^iesg/position', views_ballot.api_set_position), # Find the blob to store for a given materials document path - url(r'^meeting/(?:(?P(?:interim-)?[a-z0-9-]+)/)?materials/%(document)s(?P\.[A-Za-z0-9]+)?/resolve/$' % settings.URL_REGEXPS, meeting_views.api_resolve_materials_name), url(r'^meeting/(?:(?P(?:interim-)?[a-z0-9-]+)/)?materials/%(document)s(?P\.[A-Za-z0-9]+)?/resolve-cached/$' % settings.URL_REGEXPS, meeting_views.api_resolve_materials_name_cached), url(r'^meeting/blob/(?P[a-z0-9-]+)/(?P[a-z][a-z0-9.-]+)$', meeting_views.api_retrieve_materials_blob), # Let Meetecho set session video URLs diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index 9f5e3d76f6..f3623017f2 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -48,7 +48,6 @@ State, NewRevisionDocEvent, StateDocEvent, - DocHistory, StoredObject, ) from ietf.doc.models import DocEvent @@ -861,7 +860,7 @@ class BlobSpec: def resolve_one_material( - doc: Document | DocHistory, rev: str | None, ext: str | None + doc: Document, rev: str | None, ext: str | None ) -> BlobSpec | None: if doc.type_id is None: log(f"Cannot resolve a doc with no type: {doc.name}") @@ -995,12 +994,7 @@ def resolve_materials_for_one_meeting(meeting: Meeting): other_revisions = doc.revisions_by_newrevisionevent() other_revisions.remove(doc.rev) for rev in other_revisions: - old_doc = DocHistory.objects.filter( - doc=doc, rev=rev - ).order_by("-time").first() - if old_doc is None: - continue - blob = resolve_one_material(old_doc, rev=rev, ext=None) + blob = resolve_one_material(doc, rev=rev, ext=None) if blob is not None: resolved.append( ResolvedMaterial( diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index 6132540e3b..7e15659a3b 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -57,7 +57,7 @@ import debug # pyflakes:ignore from ietf.doc.fields import SearchableDocumentsField -from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent, DocHistory +from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent from ietf.doc.storage_utils import ( remove_from_storage, retrieve_bytes, @@ -98,7 +98,6 @@ organize_proceedings_sessions, resolve_uploaded_material, sort_accept_tuple, - resolve_one_material, ) from ietf.meeting.utils import add_event_info_to_session_qs from ietf.meeting.utils import session_time_for_sorting @@ -262,7 +261,7 @@ def current_materials(request): raise Http404('No such meeting') -def _get_materials_doc(name, meeting=None) -> tuple[Document | DocHistory, str | None]: +def _get_materials_doc(name, meeting=None): """Get meeting materials document named by name Raises Document.DoesNotExist if a match cannot be found. If meeting is None, @@ -275,7 +274,7 @@ def _matches_meeting(doc, meeting=None): return doc.get_related_meeting() == meeting # try an exact match first - doc: Document | DocHistory | None = Document.objects.filter(name=name).first() + doc = Document.objects.filter(name=name).first() if doc is not None and _matches_meeting(doc, meeting): return doc, None @@ -283,15 +282,7 @@ def _matches_meeting(doc, meeting=None): if "-" in name: docname, rev = name.rsplit("-", 1) if len(rev) == 2 and rev.isdigit(): - try: - # may raise Document.DoesNotExist - doc = Document.objects.get(name=docname, rev=rev) - except Document.DoesNotExist: - doc = DocHistory.objects.filter( - name=docname, rev=rev, - ).order_by("-time").first() - if doc is None: - raise + doc = Document.objects.get(name=docname) # may raise Document.DoesNotExist if ( _matches_meeting(doc, meeting) and rev in doc.revisions_by_newrevisionevent() @@ -371,62 +362,6 @@ def materials_document(request, document, num=None, ext=None): return HttpResponseRedirect(redirect_to=doc.get_href(meeting=meeting)) -@requires_api_token -def api_resolve_materials_name(request, document, num=None, ext=None): - """Resolve materials name into document to a blob spec - - Returns the bucket/name of a blob in the blob store that corresponds to the named - document. Handles resolution of revision if it is not specified and determines the - best extension if one is not provided. Response is JSON. - - As of 2025-10-10 we do not have blobs for all materials documents or for every - format of every document. This API still returns the bucket/name as if the blob - exists. Another API will allow the caller to obtain the file contents using that - name if it cannot be retrieved from the blob store. - """ - - def _error_response(status: int, detail: str): - return JsonResponse( - { - "status": status, - "title": "Error", - "detail": detail, - }, - status=status, - ) - - def _response(bucket: str, name: str): - return JsonResponse( - { - "bucket": bucket, - "name": name, - } - ) - - try: - meeting = get_meeting(num, type_in=["ietf", "interim"]) - except Http404 as err404: - return _error_response( - HTTP_404_NOT_FOUND, str(err404) - ) - - num = meeting.number - try: - doc, rev = _get_materials_doc(name=document, meeting=meeting) - except Document.DoesNotExist: - return _error_response( - HTTP_404_NOT_FOUND, f"No such document for meeting {num}" - ) - - resolved = resolve_one_material(doc, rev, ext) - if resolved is not None: - return _response(bucket=resolved.bucket, name=resolved.name) - - return _error_response( - HTTP_404_NOT_FOUND, f"No suitable file for {document} for meeting {num}" - ) - - @requires_api_token("ietf.meeting.views.api_resolve_materials_name") def api_resolve_materials_name_cached(request, document, num=None, ext=None): """Resolve materials name into document to a blob spec From f0431ec4e6638a101ec56ef537b63e18656fddb8 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 23 Oct 2025 13:19:32 -0300 Subject: [PATCH 13/13] feat: populate materials buckets (#9777) * refactor: drop .txt from filename_with_rev() * feat: utilities to populate materials blobs * feat: store materials for a full meeting as blobs Plus a bunch of fixup from working with real data. (Based on meetings 71, 83, and 118, picked arbitrarily) * chore: update migration * feat: task to store materials in blobdb * refactor: reimplement api_retrieve_materials_blob * fix: update resolving task, fix bugs * Revert "refactor: drop .txt from filename_with_rev()" This reverts commit a849d0f92d4df54296a7062b6c3a05fb0977be93. * chore: fix lint --- .../migrations/0002_resolvedmaterial.py | 2 +- ietf/meeting/tasks.py | 85 +++++++--- ietf/meeting/utils.py | 158 ++++++++++++++++-- ietf/meeting/views.py | 139 ++++----------- 4 files changed, 241 insertions(+), 143 deletions(-) diff --git a/ietf/blobdb/migrations/0002_resolvedmaterial.py b/ietf/blobdb/migrations/0002_resolvedmaterial.py index 2fcfdbca4c..e0ab405b11 100644 --- a/ietf/blobdb/migrations/0002_resolvedmaterial.py +++ b/ietf/blobdb/migrations/0002_resolvedmaterial.py @@ -15,7 +15,7 @@ class Migration(migrations.Migration): fields=[ ( "id", - models.AutoField( + models.BigAutoField( auto_created=True, primary_key=True, serialize=False, diff --git a/ietf/meeting/tasks.py b/ietf/meeting/tasks.py index f5186e6bc2..c361325f9a 100644 --- a/ietf/meeting/tasks.py +++ b/ietf/meeting/tasks.py @@ -1,15 +1,20 @@ -# Copyright The IETF Trust 2024, All Rights Reserved +# Copyright The IETF Trust 2024-2025, All Rights Reserved # # Celery task definitions # import datetime from celery import shared_task +# from django.db.models import QuerySet from django.utils import timezone from ietf.utils import log from .models import Meeting -from .utils import generate_proceedings_content, resolve_materials_for_one_meeting +from .utils import ( + generate_proceedings_content, + resolve_materials_for_one_meeting, + store_blobs_for_one_meeting, +) from .views import generate_agenda_data from .utils import fetch_attendance_from_meetings @@ -65,19 +70,12 @@ def fetch_meeting_attendance_task(): ) -@shared_task -def resolve_meeting_materials_task( - *, # only allow kw arguments - meetings: list[str] | None=None, - meetings_since: str | None=None, - meetings_until: str | None=None -): - """Run materials resolver on meetings - - Can request a set of meetings by number by passing a list in the meetings arg, or - by range by passing an iso-format timestamps in meetings_since / meetings_until. - To select all meetings, set meetings_since="zero" and omit other parameters. - """ +def _select_meetings( + meetings: list[str] | None = None, + meetings_since: str | None = None, + meetings_until: str | None = None +): # nyah + """Select meetings by number or date range""" # IETF-1 = 1986-01-16 EARLIEST_MEETING_DATE = datetime.datetime(1986, 1, 1) meetings_since_dt: datetime.datetime | None = None @@ -109,16 +107,16 @@ def resolve_meeting_materials_task( if meetings is None: if meetings_since_dt is None: log.log("No meetings requested, doing nothing.") - return + return Meeting.objects.none() meetings_qs = Meeting.objects.filter(date__gte=meetings_since_dt) if meetings_until_dt is not None: meetings_qs = meetings_qs.filter(date__lte=meetings_until_dt) log.log( - "Resolving materials for meetings " - f"between {meetings_since_dt} and {meetings_until_dt}" + "Selecting meetings between " + f"{meetings_since_dt} and {meetings_until_dt}" ) else: - log.log(f"Resolving materials for meetings since {meetings_since_dt}") + log.log(f"Selecting meetings since {meetings_since_dt}") else: if meetings_since_dt is not None: log.log( @@ -126,6 +124,23 @@ def resolve_meeting_materials_task( "because specific meetings were requested." ) meetings_qs = Meeting.objects.filter(number__in=meetings) + return meetings_qs + + +@shared_task +def resolve_meeting_materials_task( + *, # only allow kw arguments + meetings: list[str] | None=None, + meetings_since: str | None=None, + meetings_until: str | None=None +): + """Run materials resolver on meetings + + Can request a set of meetings by number by passing a list in the meetings arg, or + by range by passing an iso-format timestamps in meetings_since / meetings_until. + To select all meetings, set meetings_since="zero" and omit other parameters. + """ + meetings_qs = _select_meetings(meetings, meetings_since, meetings_until) for meeting in meetings_qs.order_by("date"): log.log( f"Resolving materials for {meeting.type_id} " @@ -141,3 +156,35 @@ def resolve_meeting_materials_task( ) else: log.log(f"Resolved in {(timezone.now() - mark).total_seconds():0.3f} seconds.") + + +@shared_task +def store_meeting_materials_as_blobs_task( + *, # only allow kw arguments + meetings: list[str] | None = None, + meetings_since: str | None = None, + meetings_until: str | None = None +): + """Push meeting materials into the blob store + + Can request a set of meetings by number by passing a list in the meetings arg, or + by range by passing an iso-format timestamps in meetings_since / meetings_until. + To select all meetings, set meetings_since="zero" and omit other parameters. + """ + meetings_qs = _select_meetings(meetings, meetings_since, meetings_until) + for meeting in meetings_qs.order_by("date"): + log.log( + f"Creating blobs for materials for {meeting.type_id} " + f"meeting {meeting.number} ({meeting.date})..." + ) + mark = timezone.now() + try: + store_blobs_for_one_meeting(meeting) + except Exception as err: + log.log( + "Exception raised while creating blobs for " + f"meeting {meeting.number}: {err}" + ) + else: + log.log( + f"Blobs created in {(timezone.now() - mark).total_seconds():0.3f} seconds.") diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index f3623017f2..053a0bb0fb 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import datetime import itertools +from contextlib import suppress from dataclasses import dataclass import jsonschema @@ -29,7 +30,7 @@ import debug # pyflakes:ignore from ietf.dbtemplate.models import DBTemplate -from ietf.doc.storage_utils import store_bytes, store_str +from ietf.doc.storage_utils import store_bytes, store_str, AlreadyExistsError from ietf.meeting.models import ( Session, SchedulingEvent, @@ -55,6 +56,7 @@ from ietf.group.utils import can_manage_materials from ietf.name.models import SessionStatusName, ConstraintName, DocTypeName from ietf.person.models import Person +from ietf.utils import markdown from ietf.utils.html import clean_html from ietf.utils.log import log from ietf.utils.timezone import date_today @@ -872,16 +874,23 @@ def resolve_one_material( else: basename = Path(f"{doc.name}-{int(rev):02d}") + # If the document's file exists, the blob is _always_ named with this stem, + # even if it's different from the original. + blob_stem = Path(f"{doc.name}-{rev or doc.rev}") + # If we have an extension, either from the URL or the Document's base name, look up - # the blob or file or return 404. - if ext or basename.suffix != "": + # the blob or file or return 404. N.b. the suffix check needs adjustment to handle + # a bare "." extension when we reach py3.14. + if ext or basename.suffix != "": if ext: - basename = basename.with_suffix(ext) + blob_name = str(blob_stem.with_suffix(ext)) + else: + blob_name = str(blob_stem.with_suffix(basename.suffix)) # See if we have a stored object under that name preferred_blob = ( StoredObject.objects.exclude_deleted() - .filter(store=doc.type_id, name=basename) + .filter(store=doc.type_id, name=blob_name) .first() ) if preferred_blob is not None: @@ -890,11 +899,11 @@ def resolve_one_material( name=preferred_blob.name, ) # No stored object, fall back to the file system. - filename = Path(doc.get_file_path()) / basename - if filename.exists(): + filename = Path(doc.get_file_path()) / basename # use basename for file + if filename.is_file(): return BlobSpec( bucket=doc.type_id, - name=str(basename), + name=str(blob_stem.with_suffix(filename.suffix)), ) else: return None @@ -904,7 +913,7 @@ def resolve_one_material( StoredObject.objects.exclude_deleted() .filter( store=doc.type_id, - name__startswith=f"{basename.stem}.", # anchor to end with trailing "." + name__startswith=f"{blob_stem}.", # anchor to end with trailing "." ) .order_by("name") ) # orders by suffix @@ -918,7 +927,7 @@ def resolve_one_material( pdf_blob = blob_ext_choices[".pdf"] return BlobSpec( bucket=pdf_blob.store, - name=pdf_blob.name, + name=str(blob_stem.with_suffix(".pdf")), ) # Now look for files @@ -931,10 +940,9 @@ def resolve_one_material( # Short-circuit to return pdf if we have the file if ".pdf" in file_ext_choices: - pdf_filename = file_ext_choices[".pdf"] return BlobSpec( bucket=doc.type_id, - name=pdf_filename, + name=str(blob_stem.with_suffix(".pdf")), ) all_exts = set(blob_ext_choices.keys()).union(file_ext_choices.keys()) @@ -947,10 +955,9 @@ def resolve_one_material( name=preferred_blob.name, ) else: - preferred_filename = file_ext_choices[preferred_ext] return BlobSpec( bucket=doc.type_id, - name=preferred_filename, + name=str(blob_stem.with_suffix(preferred_ext)), ) return None @@ -1048,6 +1055,129 @@ def resolve_uploaded_material(meeting: Meeting, doc: Document): update_fields=["bucket", "blob"], ) + +def store_blob_for_one_material_file(doc: Document, rev: str, filepath: Path): + if not settings.ENABLE_BLOBSTORAGE: + raise RuntimeError("Cannot store blobs: ENABLE_BLOBSTORAGE is False") + + bucket = doc.type_id + if bucket not in settings.MATERIALS_TYPES_SERVED_BY_WORKER: + raise ValueError(f"Bucket {bucket} not found for doc {doc.name}.") + blob_stem = f"{doc.name}-{rev}" + suffix = filepath.suffix # includes leading "." + + # Store the file + try: + file_bytes = filepath.read_bytes() + except Exception as err: + log(f"Failed to read {filepath}: {err}") + raise + with suppress(AlreadyExistsError): + store_bytes( + kind=bucket, + name= blob_stem + suffix, + content=file_bytes, + mtime=datetime.datetime.fromtimestamp( + filepath.stat().st_mtime, + tz=datetime.UTC, + ), + allow_overwrite=False, + doc_name=doc.name, + doc_rev=rev, + ) + + # Special case: pre-render markdown into HTML as .md.html + if suffix == ".md": + try: + markdown_source = file_bytes.decode("utf-8") + except UnicodeDecodeError as err: + log(f"Unable to decode {filepath} as UTF-8, treating as latin-1: {err}") + markdown_source = file_bytes.decode("latin-1") + # render the markdown + try: + html = render_to_string( + "minimal.html", + { + "content": markdown.markdown(markdown_source), + "title": blob_stem, + "static_ietf_org": settings.STATIC_IETF_ORG, + }, + ) + except Exception as err: + log(f"Failed to render markdown for {filepath}: {err}") + else: + # Don't overwrite, but don't fail if the blob exists + with suppress(AlreadyExistsError): + store_str( + kind=bucket, + name=blob_stem + ".md.html", + content=html, + allow_overwrite=False, + doc_name=doc.name, + doc_rev=rev, + content_type="text/html;charset=utf-8", + ) + + +def store_blobs_for_one_material_doc(doc: Document): + """Ensure that all files related to a materials Document are in the blob store""" + if doc.type_id not in settings.MATERIALS_TYPES_SERVED_BY_WORKER: + log(f"This method does not handle docs of type {doc.name}") + return + + # Store files for current Document / rev + file_path = Path(doc.get_file_path()) + base_name = Path(doc.get_base_name()) + # .stem would remove directories, so use .with_suffix("") + base_name_stem = str(base_name.with_suffix("")) + if base_name_stem.endswith(".") and base_name.suffix == "": + # In Python 3.14, a trailing "." is a valid suffix, but in prior versions + # it is left as part of the stem. The suffix check ensures that either way, + # only a single "." will be removed. + base_name_stem = base_name_stem[:-1] + # Add any we find without the rev + for file_to_store in file_path.glob(base_name_stem + ".*"): + if not (file_to_store.is_file()): + continue + try: + store_blob_for_one_material_file(doc, doc.rev, file_to_store) + except Exception as err: + log( + f"Failed to store blob for {doc} rev {doc.rev} " + f"from {file_to_store}: {err}" + ) + + # Get other revisions + for rev in doc.revisions_by_newrevisionevent(): + if rev == doc.rev: + continue # already handled this + + # Add some that have the rev + for file_to_store in file_path.glob(doc.name + f"-{rev}.*"): + if not file_to_store.is_file(): + continue + try: + store_blob_for_one_material_file(doc, rev, file_to_store) + except Exception as err: + log( + f"Failed to store blob for {doc} rev {rev} " + f"from {file_to_store}: {err}" + ) + + +def store_blobs_for_one_meeting(meeting: Meeting): + meeting_documents = ( + Document.objects.filter( + type_id__in=settings.MATERIALS_TYPES_SERVED_BY_WORKER + ).filter( + Q(session__meeting=meeting) | Q(proceedingsmaterial__meeting=meeting) + ) + ).distinct() + + for doc in meeting_documents: + store_blobs_for_one_material_doc(doc) + + def create_recording(session, url, title=None, user=None): ''' Creates the Document type=recording, setting external_url and creating diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index 7e15659a3b..cf6fed596b 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -9,7 +9,6 @@ import json import math import os -from contextlib import suppress import pytz import re @@ -62,8 +61,6 @@ remove_from_storage, retrieve_bytes, store_file, - AlreadyExistsError, - store_str, store_bytes, ) from ietf.group.models import Group from ietf.group.utils import can_manage_session_materials, can_manage_some_groups, can_manage_group @@ -97,7 +94,7 @@ generate_proceedings_content, organize_proceedings_sessions, resolve_uploaded_material, - sort_accept_tuple, + sort_accept_tuple, store_blobs_for_one_material_doc, ) from ietf.meeting.utils import add_event_info_to_session_qs from ietf.meeting.utils import session_time_for_sorting @@ -451,118 +448,42 @@ def _default_content_type(blob_name: str): content_type=blob.content_type or _default_content_type(name), ) - # Did not find the blob. See if the filename is .md.html and, if so, see if we - # have the markdown. + # Did not find the blob. Create it if we can name_as_path = Path(name) if name_as_path.suffixes == [".md", ".html"]: - md_filename = name_as_path.stem - try: - md_file = storage.open(md_filename, "rb") - except FileNotFoundError: - pass - else: - md_src = md_file.read().decode("utf-8") - md_file.close() - # render the markdown - html = render_to_string( - "minimal.html", - { - "content": markdown.markdown(md_src), - "title": name_as_path.stem, - "static_ietf_org": settings.STATIC_IETF_ORG, - }, - ) - # Don't overwrite, but don't fail if the blob exists - with suppress(AlreadyExistsError): - store_str( - kind=bucket, - name=name, - content=html, - allow_overwrite=False, - # todo doc_name, - # todo doc_rev, - content_type="text/html;charset=utf-8", - ) - return HttpResponse(html) - # Didn't find .md as a blob, so check the filesystem. We do this here - # because we never write .md.html to the filesystem. - try: - # .stem.stem drops both extensions - doc, rev = _get_materials_doc(Path(md_filename).stem) - except Document.DoesNotExist: - pass - else: - if doc.type_id == bucket and doc.get_base_name() == md_filename: - filename = Path(doc.get_file_path()) / md_filename - md_bytes = filename.read_bytes() - # Don't overwrite, but don't fail if the blob exists - with suppress(AlreadyExistsError): - store_bytes( - kind=bucket, - name=md_filename, - content=md_bytes, - mtime=datetime.datetime.fromtimestamp( - filename.stat().st_mtime, - tz=datetime.UTC, - ), - allow_overwrite=False, - doc_name=doc.name, - doc_rev=doc.rev, - ) - # render the markdown - md_src = md_bytes.decode() - html = render_to_string( - "minimal.html", - { - "content": markdown.markdown(md_src), - "title": md_filename, - "static_ietf_org": settings.STATIC_IETF_ORG, - }, - ) - # Don't overwrite, but don't fail if the blob exists - with suppress(AlreadyExistsError): - store_str( - kind=bucket, - name=name, - content=html, - allow_overwrite=False, - doc_name=doc.name, - doc_rev=doc.rev, - content_type="text/html;charset=utf-8", - ) - return HttpResponse(html) - + # special case: .md.html means we want to create the .md and the .md.html + # will come along as a bonus + name_to_store = name_as_path.stem # removes the .html + else: + name_to_store = name + # See if we have a meeting-related document that matches the requested bucket and # name. try: - doc, rev = _get_materials_doc(Path(name).stem) + doc, rev = _get_materials_doc(Path(name_to_store).stem) + if doc.type_id != bucket: + raise Document.DoesNotExist except Document.DoesNotExist: - pass + return HttpResponseNotFound( + f"Document corresponding to {bucket}:{name} not found." + ) else: - if doc.type_id == bucket and doc.get_base_name() == name: - filename = Path(doc.get_file_path()) / name - with filename.open("rb") as f: - # Don't overwrite, but don't fail if the blob exists - with suppress(AlreadyExistsError): - store_file( - kind=bucket, - name=name, - file=f, - mtime=datetime.datetime.fromtimestamp( - filename.stat().st_mtime, - tz=datetime.UTC, - ), - allow_overwrite=False, - doc_name=doc.name, - doc_rev=doc.rev, - ) - return FileResponse( - filename.open("rb"), - filename=name, - content_type=_default_content_type(name), - ) - - return HttpResponseNotFound(f"Object {bucket}:{name} not found.") + # create all missing blobs for the doc while we're at it + store_blobs_for_one_material_doc(doc) + + # If we can make the blob at all, it now exists, so return it or a 404 + try: + blob = storage.open(name, "rb") + except FileNotFoundError: + return HttpResponseNotFound(f"Object {bucket}:{name} not found.") + else: + # found the blob - return it + assert isinstance(blob, BlobFile) + return FileResponse( + blob, + filename=name, + content_type=blob.content_type or _default_content_type(name), + ) @login_required