diff --git a/ietf/api/urls.py b/ietf/api/urls.py index 6f2efb3c1e..04575b34cb 100644 --- a/ietf/api/urls.py +++ b/ietf/api/urls.py @@ -49,6 +49,9 @@ url(r'^group/role-holder-addresses/$', api_views.role_holder_addresses), # Let IESG members set positions programmatically url(r'^iesg/position', views_ballot.api_set_position), + # Find the blob to store for a given materials document path + url(r'^meeting/(?:(?P(?:interim-)?[a-z0-9-]+)/)?materials/%(document)s(?P\.[A-Za-z0-9]+)?/resolve-cached/$' % settings.URL_REGEXPS, meeting_views.api_resolve_materials_name_cached), + url(r'^meeting/blob/(?P[a-z0-9-]+)/(?P[a-z][a-z0-9.-]+)$', meeting_views.api_retrieve_materials_blob), # Let Meetecho set session video URLs url(r'^meeting/session/video/url$', meeting_views.api_set_session_video_url), # Let Meetecho tell us the name of its recordings diff --git a/ietf/blobdb/admin.py b/ietf/blobdb/admin.py index f4cd002e07..3e1a2a311f 100644 --- a/ietf/blobdb/admin.py +++ b/ietf/blobdb/admin.py @@ -3,7 +3,7 @@ from django.db.models.functions import Length from rangefilter.filters import DateRangeQuickSelectListFilterBuilder -from .models import Blob +from .models import Blob, ResolvedMaterial @admin.register(Blob) @@ -29,3 +29,12 @@ def get_queryset(self, request): def object_size(self, instance): """Get the size of the object""" return instance.object_size # annotation added in get_queryset() + + +@admin.register(ResolvedMaterial) +class ResolvedMaterialAdmin(admin.ModelAdmin): + model = ResolvedMaterial + list_display = ["name", "meeting_number", "bucket", "blob"] + list_filter = ["meeting_number", "bucket"] + search_fields = ["name", "blob"] + ordering = ["name"] diff --git a/ietf/blobdb/migrations/0002_resolvedmaterial.py b/ietf/blobdb/migrations/0002_resolvedmaterial.py new file mode 100644 index 0000000000..e0ab405b11 --- /dev/null +++ b/ietf/blobdb/migrations/0002_resolvedmaterial.py @@ -0,0 +1,48 @@ +# Copyright The IETF Trust 2025, All Rights Reserved + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("blobdb", "0001_initial"), + ] + + operations = [ + migrations.CreateModel( + name="ResolvedMaterial", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("name", models.CharField(help_text="Name to resolve", max_length=300)), + ( + "meeting_number", + models.CharField( + help_text="Meeting material is related to", max_length=64 + ), + ), + ( + "bucket", + models.CharField(help_text="Resolved bucket name", max_length=255), + ), + ( + "blob", + models.CharField(help_text="Resolved blob name", max_length=300), + ), + ], + ), + migrations.AddConstraint( + model_name="resolvedmaterial", + constraint=models.UniqueConstraint( + fields=("name", "meeting_number"), name="unique_name_per_meeting" + ), + ), + ] diff --git a/ietf/blobdb/models.py b/ietf/blobdb/models.py index 8f423d9f6c..fa7831f203 100644 --- a/ietf/blobdb/models.py +++ b/ietf/blobdb/models.py @@ -96,3 +96,23 @@ def _emit_blob_change_event(self, using=None): ), using=using, ) + + +class ResolvedMaterial(models.Model): + # A Document name can be 255 characters; allow this name to be a bit longer + name = models.CharField(max_length=300, help_text="Name to resolve") + meeting_number = models.CharField( + max_length=64, help_text="Meeting material is related to" + ) + bucket = models.CharField(max_length=255, help_text="Resolved bucket name") + blob = models.CharField(max_length=300, help_text="Resolved blob name") + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["name", "meeting_number"], name="unique_name_per_meeting" + ) + ] + + def __str__(self): + return f"{self.name}@{self.meeting_number} -> {self.bucket}:{self.blob}" diff --git a/ietf/doc/models.py b/ietf/doc/models.py index 25ee734cbe..8bb79b64ed 100644 --- a/ietf/doc/models.py +++ b/ietf/doc/models.py @@ -913,6 +913,7 @@ def role_for_doc(self): roles.append('Action Holder') return ', '.join(roles) +# N.B., at least a couple dozen documents exist that do not satisfy this validator validate_docname = RegexValidator( r'^[-a-z0-9]+$', "Provide a valid document name consisting of lowercase letters, numbers and hyphens.", @@ -1588,9 +1589,17 @@ class BofreqResponsibleDocEvent(DocEvent): """ Capture the responsible leadership (IAB and IESG members) for a BOF Request """ responsible = models.ManyToManyField('person.Person', blank=True) + +class StoredObjectQuerySet(models.QuerySet): + def exclude_deleted(self): + return self.filter(deleted__isnull=True) + + class StoredObject(models.Model): """Hold metadata about objects placed in object storage""" + objects = StoredObjectQuerySet.as_manager() + store = models.CharField(max_length=256) name = models.CharField(max_length=1024, null=False, blank=False) # N.B. the 1024 limit on name comes from S3 sha384 = models.CharField(max_length=96) diff --git a/ietf/doc/storage.py b/ietf/doc/storage.py index a234ef2d4f..375620ccaf 100644 --- a/ietf/doc/storage.py +++ b/ietf/doc/storage.py @@ -32,7 +32,7 @@ def __init__(self, file, name, mtime=None, content_type="", store=None, doc_name @classmethod def from_storedobject(cls, file, name, store): """Alternate constructor for objects that already exist in the StoredObject table""" - stored_object = StoredObject.objects.filter(store=store, name=name, deleted__isnull=True).first() + stored_object = StoredObject.objects.exclude_deleted().filter(store=store, name=name).first() if stored_object is None: raise FileNotFoundError(f"StoredObject for {store}:{name} does not exist or was deleted") file = cls(file, name, store, doc_name=stored_object.doc_name, doc_rev=stored_object.doc_rev) @@ -140,7 +140,11 @@ def _save_stored_object(self, name, content) -> StoredObject: ), ), ) - if not created: + if not created and ( + record.sha384 != content.custom_metadata["sha384"] + or record.len != int(content.custom_metadata["len"]) + or record.deleted is not None + ): record.sha384 = content.custom_metadata["sha384"] record.len = int(content.custom_metadata["len"]) record.modified = now @@ -160,7 +164,7 @@ def _delete_stored_object(self, name) -> Optional[StoredObject]: else: now = timezone.now() # Note that existing_record is a queryset that will have one matching object - existing_record.filter(deleted__isnull=True).update(deleted=now) + existing_record.exclude_deleted().update(deleted=now) return existing_record.first() def _save(self, name, content): diff --git a/ietf/doc/storage_utils.py b/ietf/doc/storage_utils.py index 510c98c4f5..81588c83ec 100644 --- a/ietf/doc/storage_utils.py +++ b/ietf/doc/storage_utils.py @@ -12,6 +12,14 @@ from ietf.utils.log import log +class StorageUtilsError(Exception): + pass + + +class AlreadyExistsError(StorageUtilsError): + pass + + def _get_storage(kind: str) -> Storage: if kind in settings.ARTIFACT_STORAGE_NAMES: return storages[kind] @@ -70,7 +78,7 @@ def store_file( # debug.show('f"Asked to store {name} in {kind}: is_new={is_new}, allow_overwrite={allow_overwrite}"') if not allow_overwrite and not is_new: debug.show('f"Failed to save {kind}:{name} - name already exists in store"') - raise RuntimeError(f"Failed to save {kind}:{name} - name already exists in store") + raise AlreadyExistsError(f"Failed to save {kind}:{name} - name already exists in store") new_name = _get_storage(kind).save( name, StoredObjectFile( @@ -85,7 +93,7 @@ def store_file( if new_name != name: complaint = f"Error encountered saving '{name}' - results stored in '{new_name}' instead." debug.show("complaint") - raise RuntimeError(complaint) + raise StorageUtilsError(complaint) except Exception as err: log(f"Blobstore Error: Failed to store file {kind}:{name}: {repr(err)}") if settings.SERVER_MODE == "development": diff --git a/ietf/doc/views_material.py b/ietf/doc/views_material.py index 6f8b8a8f12..eefac0ca61 100644 --- a/ietf/doc/views_material.py +++ b/ietf/doc/views_material.py @@ -22,6 +22,7 @@ from ietf.doc.utils import add_state_change_event, check_common_doc_name_rules from ietf.group.models import Group from ietf.group.utils import can_manage_materials +from ietf.meeting.utils import resolve_uploaded_material from ietf.utils import log from ietf.utils.decorators import ignore_view_kwargs from ietf.utils.meetecho import MeetechoAPIError, SlidesManager @@ -179,6 +180,9 @@ def edit_material(request, name=None, acronym=None, action=None, doc_type=None): "There was an error creating a hardlink at %s pointing to %s: %s" % (ftp_filepath, filepath, ex) ) + else: + for meeting in set([s.meeting for s in doc.session_set.all()]): + resolve_uploaded_material(meeting=meeting, doc=doc) if prev_rev != doc.rev: e = NewRevisionDocEvent(type="new_revision", doc=doc, rev=doc.rev) diff --git a/ietf/meeting/resources.py b/ietf/meeting/resources.py index ede2b5b993..88562a88fe 100644 --- a/ietf/meeting/resources.py +++ b/ietf/meeting/resources.py @@ -11,11 +11,15 @@ from ietf import api -from ietf.meeting.models import ( Meeting, ResourceAssociation, Constraint, Room, Schedule, Session, - TimeSlot, SchedTimeSessAssignment, SessionPresentation, FloorPlan, - UrlResource, ImportantDate, SlideSubmission, SchedulingEvent, - BusinessConstraint, ProceedingsMaterial, MeetingHost, Attended, - Registration, RegistrationTicket) +from ietf.meeting.models import (Meeting, ResourceAssociation, Constraint, Room, + Schedule, Session, + TimeSlot, SchedTimeSessAssignment, SessionPresentation, + FloorPlan, + UrlResource, ImportantDate, SlideSubmission, + SchedulingEvent, + BusinessConstraint, ProceedingsMaterial, MeetingHost, + Attended, + Registration, RegistrationTicket) from ietf.name.resources import MeetingTypeNameResource class MeetingResource(ModelResource): diff --git a/ietf/meeting/tasks.py b/ietf/meeting/tasks.py index 784eb00d87..c361325f9a 100644 --- a/ietf/meeting/tasks.py +++ b/ietf/meeting/tasks.py @@ -1,13 +1,20 @@ -# Copyright The IETF Trust 2024, All Rights Reserved +# Copyright The IETF Trust 2024-2025, All Rights Reserved # # Celery task definitions # +import datetime + from celery import shared_task +# from django.db.models import QuerySet from django.utils import timezone from ietf.utils import log from .models import Meeting -from .utils import generate_proceedings_content +from .utils import ( + generate_proceedings_content, + resolve_materials_for_one_meeting, + store_blobs_for_one_meeting, +) from .views import generate_agenda_data from .utils import fetch_attendance_from_meetings @@ -61,3 +68,123 @@ def fetch_meeting_attendance_task(): meeting_stats['processed'] ) ) + + +def _select_meetings( + meetings: list[str] | None = None, + meetings_since: str | None = None, + meetings_until: str | None = None +): # nyah + """Select meetings by number or date range""" + # IETF-1 = 1986-01-16 + EARLIEST_MEETING_DATE = datetime.datetime(1986, 1, 1) + meetings_since_dt: datetime.datetime | None = None + meetings_until_dt: datetime.datetime | None = None + + if meetings_since == "zero": + meetings_since_dt = EARLIEST_MEETING_DATE + elif meetings_since is not None: + try: + meetings_since_dt = datetime.datetime.fromisoformat(meetings_since) + except ValueError: + log.log( + "Failed to parse meetings_since='{meetings_since}' with fromisoformat" + ) + raise + + if meetings_until is not None: + try: + meetings_until_dt = datetime.datetime.fromisoformat(meetings_until) + except ValueError: + log.log( + "Failed to parse meetings_until='{meetings_until}' with fromisoformat" + ) + raise + if meetings_since_dt is None: + # if we only got meetings_until, start from the first meeting + meetings_since_dt = EARLIEST_MEETING_DATE + + if meetings is None: + if meetings_since_dt is None: + log.log("No meetings requested, doing nothing.") + return Meeting.objects.none() + meetings_qs = Meeting.objects.filter(date__gte=meetings_since_dt) + if meetings_until_dt is not None: + meetings_qs = meetings_qs.filter(date__lte=meetings_until_dt) + log.log( + "Selecting meetings between " + f"{meetings_since_dt} and {meetings_until_dt}" + ) + else: + log.log(f"Selecting meetings since {meetings_since_dt}") + else: + if meetings_since_dt is not None: + log.log( + "Ignoring meetings_since and meetings_until " + "because specific meetings were requested." + ) + meetings_qs = Meeting.objects.filter(number__in=meetings) + return meetings_qs + + +@shared_task +def resolve_meeting_materials_task( + *, # only allow kw arguments + meetings: list[str] | None=None, + meetings_since: str | None=None, + meetings_until: str | None=None +): + """Run materials resolver on meetings + + Can request a set of meetings by number by passing a list in the meetings arg, or + by range by passing an iso-format timestamps in meetings_since / meetings_until. + To select all meetings, set meetings_since="zero" and omit other parameters. + """ + meetings_qs = _select_meetings(meetings, meetings_since, meetings_until) + for meeting in meetings_qs.order_by("date"): + log.log( + f"Resolving materials for {meeting.type_id} " + f"meeting {meeting.number} ({meeting.date})..." + ) + mark = timezone.now() + try: + resolve_materials_for_one_meeting(meeting) + except Exception as err: + log.log( + "Exception raised while resolving materials for " + f"meeting {meeting.number}: {err}" + ) + else: + log.log(f"Resolved in {(timezone.now() - mark).total_seconds():0.3f} seconds.") + + +@shared_task +def store_meeting_materials_as_blobs_task( + *, # only allow kw arguments + meetings: list[str] | None = None, + meetings_since: str | None = None, + meetings_until: str | None = None +): + """Push meeting materials into the blob store + + Can request a set of meetings by number by passing a list in the meetings arg, or + by range by passing an iso-format timestamps in meetings_since / meetings_until. + To select all meetings, set meetings_since="zero" and omit other parameters. + """ + meetings_qs = _select_meetings(meetings, meetings_since, meetings_until) + for meeting in meetings_qs.order_by("date"): + log.log( + f"Creating blobs for materials for {meeting.type_id} " + f"meeting {meeting.number} ({meeting.date})..." + ) + mark = timezone.now() + try: + store_blobs_for_one_meeting(meeting) + except Exception as err: + log.log( + "Exception raised while creating blobs for " + f"meeting {meeting.number}: {err}" + ) + else: + log.log( + f"Blobs created in {(timezone.now() - mark).total_seconds():0.3f} seconds.") diff --git a/ietf/meeting/utils.py b/ietf/meeting/utils.py index feadb0c7fd..053a0bb0fb 100644 --- a/ietf/meeting/utils.py +++ b/ietf/meeting/utils.py @@ -2,6 +2,9 @@ # -*- coding: utf-8 -*- import datetime import itertools +from contextlib import suppress +from dataclasses import dataclass + import jsonschema import os import requests @@ -27,16 +30,33 @@ import debug # pyflakes:ignore from ietf.dbtemplate.models import DBTemplate -from ietf.doc.storage_utils import store_bytes, store_str -from ietf.meeting.models import (Session, SchedulingEvent, TimeSlot, - Constraint, SchedTimeSessAssignment, SessionPresentation, Attended, - Registration, Meeting, RegistrationTicket) -from ietf.doc.models import Document, State, NewRevisionDocEvent, StateDocEvent +from ietf.doc.storage_utils import store_bytes, store_str, AlreadyExistsError +from ietf.meeting.models import ( + Session, + SchedulingEvent, + TimeSlot, + Constraint, + SchedTimeSessAssignment, + SessionPresentation, + Attended, + Registration, + Meeting, + RegistrationTicket, +) +from ietf.blobdb.models import ResolvedMaterial +from ietf.doc.models import ( + Document, + State, + NewRevisionDocEvent, + StateDocEvent, + StoredObject, +) from ietf.doc.models import DocEvent from ietf.group.models import Group from ietf.group.utils import can_manage_materials from ietf.name.models import SessionStatusName, ConstraintName, DocTypeName from ietf.person.models import Person +from ietf.utils import markdown from ietf.utils.html import clean_html from ietf.utils.log import log from ietf.utils.timezone import date_today @@ -221,6 +241,7 @@ def save_bluesheet(request, session, file, encoding='utf-8'): save_error = handle_upload_file(file, filename, session.meeting, 'bluesheets', request=request, encoding=encoding) if not save_error: doc.save_with_history([e]) + resolve_uploaded_material(meeting=session.meeting, doc=doc) return save_error @@ -833,6 +854,330 @@ def write_doc_for_session(session, type_id, filename, contents): store_str(type_id, filename.name, contents) return None + +@dataclass +class BlobSpec: + bucket: str + name: str + + +def resolve_one_material( + doc: Document, rev: str | None, ext: str | None +) -> BlobSpec | None: + if doc.type_id is None: + log(f"Cannot resolve a doc with no type: {doc.name}") + return None + + # Get the Document's base name. It may or may not have an extension. + if rev is None: + basename = Path(doc.get_base_name()) + else: + basename = Path(f"{doc.name}-{int(rev):02d}") + + # If the document's file exists, the blob is _always_ named with this stem, + # even if it's different from the original. + blob_stem = Path(f"{doc.name}-{rev or doc.rev}") + + # If we have an extension, either from the URL or the Document's base name, look up + # the blob or file or return 404. N.b. the suffix check needs adjustment to handle + # a bare "." extension when we reach py3.14. + if ext or basename.suffix != "": + if ext: + blob_name = str(blob_stem.with_suffix(ext)) + else: + blob_name = str(blob_stem.with_suffix(basename.suffix)) + + # See if we have a stored object under that name + preferred_blob = ( + StoredObject.objects.exclude_deleted() + .filter(store=doc.type_id, name=blob_name) + .first() + ) + if preferred_blob is not None: + return BlobSpec( + bucket=preferred_blob.store, + name=preferred_blob.name, + ) + # No stored object, fall back to the file system. + filename = Path(doc.get_file_path()) / basename # use basename for file + if filename.is_file(): + return BlobSpec( + bucket=doc.type_id, + name=str(blob_stem.with_suffix(filename.suffix)), + ) + else: + return None + + # No extension has been specified so far, so look one up. + matching_stored_objects = ( + StoredObject.objects.exclude_deleted() + .filter( + store=doc.type_id, + name__startswith=f"{blob_stem}.", # anchor to end with trailing "." + ) + .order_by("name") + ) # orders by suffix + blob_ext_choices = { + Path(stored_obj.name).suffix: stored_obj + for stored_obj in matching_stored_objects + } + + # Short-circuit to return pdf if present + if ".pdf" in blob_ext_choices: + pdf_blob = blob_ext_choices[".pdf"] + return BlobSpec( + bucket=pdf_blob.store, + name=str(blob_stem.with_suffix(".pdf")), + ) + + # Now look for files + filename = Path(doc.get_file_path()) / basename + file_ext_choices = { + # Construct a map from suffix to full filename + fn.suffix: fn.name + for fn in sorted(filename.parent.glob(filename.stem + ".*")) + } + + # Short-circuit to return pdf if we have the file + if ".pdf" in file_ext_choices: + return BlobSpec( + bucket=doc.type_id, + name=str(blob_stem.with_suffix(".pdf")), + ) + + all_exts = set(blob_ext_choices.keys()).union(file_ext_choices.keys()) + if len(all_exts) > 0: + preferred_ext = sorted(all_exts)[0] + if preferred_ext in blob_ext_choices: + preferred_blob = blob_ext_choices[preferred_ext] + return BlobSpec( + bucket=preferred_blob.store, + name=preferred_blob.name, + ) + else: + return BlobSpec( + bucket=doc.type_id, + name=str(blob_stem.with_suffix(preferred_ext)), + ) + + return None + + +def resolve_materials_for_one_meeting(meeting: Meeting): + start_time = timezone.now() + meeting_documents = ( + Document.objects.filter( + type_id__in=settings.MATERIALS_TYPES_SERVED_BY_WORKER + ).filter( + Q(session__meeting=meeting) | Q(proceedingsmaterial__meeting=meeting) + ) + ).distinct() + + resolved = [] + for doc in meeting_documents: + # request by doc name with no rev + blob = resolve_one_material(doc, rev=None, ext=None) + if blob is not None: + resolved.append( + ResolvedMaterial( + name=doc.name, + meeting_number=meeting.number, + bucket=blob.bucket, + blob=blob.name, + ) + ) + # request by doc name + rev + blob = resolve_one_material(doc, rev=doc.rev, ext=None) + if blob is not None: + resolved.append( + ResolvedMaterial( + name=f"{doc.name}-{doc.rev:02}", + meeting_number=meeting.number, + bucket=blob.bucket, + blob=blob.name, + ) + ) + # for other revisions, only need request by doc name + rev + other_revisions = doc.revisions_by_newrevisionevent() + other_revisions.remove(doc.rev) + for rev in other_revisions: + blob = resolve_one_material(doc, rev=rev, ext=None) + if blob is not None: + resolved.append( + ResolvedMaterial( + name=f"{doc.name}-{rev:02}", + meeting_number=meeting.number, + bucket=blob.bucket, + blob=blob.name, + ) + ) + ResolvedMaterial.objects.bulk_create( + resolved, + update_conflicts=True, + unique_fields=["name", "meeting_number"], + update_fields=["bucket", "blob"], + ) + # Warn if any files were updated during the above process + last_update = meeting_documents.aggregate(Max("time"))["time__max"] + if last_update and last_update > start_time: + log( + f"Warning: materials for meeting {meeting.number} " + "changed during ResolvedMaterial update" + ) + +def resolve_uploaded_material(meeting: Meeting, doc: Document): + resolved = [] + blob = resolve_one_material(doc, rev=None, ext=None) + if blob is not None: + resolved.append( + ResolvedMaterial( + name=doc.name, + meeting_number=meeting.number, + bucket=blob.bucket, + blob=blob.name, + ) + ) + # request by doc name + rev + blob = resolve_one_material(doc, rev=doc.rev, ext=None) + if blob is not None: + resolved.append( + ResolvedMaterial( + name=f"{doc.name}-{doc.rev:02}", + meeting_number=meeting.number, + bucket=blob.bucket, + blob=blob.name, + ) + ) + ResolvedMaterial.objects.bulk_create( + resolved, + update_conflicts=True, + unique_fields=["name", "meeting_number"], + update_fields=["bucket", "blob"], + ) + + +def store_blob_for_one_material_file(doc: Document, rev: str, filepath: Path): + if not settings.ENABLE_BLOBSTORAGE: + raise RuntimeError("Cannot store blobs: ENABLE_BLOBSTORAGE is False") + + bucket = doc.type_id + if bucket not in settings.MATERIALS_TYPES_SERVED_BY_WORKER: + raise ValueError(f"Bucket {bucket} not found for doc {doc.name}.") + blob_stem = f"{doc.name}-{rev}" + suffix = filepath.suffix # includes leading "." + + # Store the file + try: + file_bytes = filepath.read_bytes() + except Exception as err: + log(f"Failed to read {filepath}: {err}") + raise + with suppress(AlreadyExistsError): + store_bytes( + kind=bucket, + name= blob_stem + suffix, + content=file_bytes, + mtime=datetime.datetime.fromtimestamp( + filepath.stat().st_mtime, + tz=datetime.UTC, + ), + allow_overwrite=False, + doc_name=doc.name, + doc_rev=rev, + ) + + # Special case: pre-render markdown into HTML as .md.html + if suffix == ".md": + try: + markdown_source = file_bytes.decode("utf-8") + except UnicodeDecodeError as err: + log(f"Unable to decode {filepath} as UTF-8, treating as latin-1: {err}") + markdown_source = file_bytes.decode("latin-1") + # render the markdown + try: + html = render_to_string( + "minimal.html", + { + "content": markdown.markdown(markdown_source), + "title": blob_stem, + "static_ietf_org": settings.STATIC_IETF_ORG, + }, + ) + except Exception as err: + log(f"Failed to render markdown for {filepath}: {err}") + else: + # Don't overwrite, but don't fail if the blob exists + with suppress(AlreadyExistsError): + store_str( + kind=bucket, + name=blob_stem + ".md.html", + content=html, + allow_overwrite=False, + doc_name=doc.name, + doc_rev=rev, + content_type="text/html;charset=utf-8", + ) + + +def store_blobs_for_one_material_doc(doc: Document): + """Ensure that all files related to a materials Document are in the blob store""" + if doc.type_id not in settings.MATERIALS_TYPES_SERVED_BY_WORKER: + log(f"This method does not handle docs of type {doc.name}") + return + + # Store files for current Document / rev + file_path = Path(doc.get_file_path()) + base_name = Path(doc.get_base_name()) + # .stem would remove directories, so use .with_suffix("") + base_name_stem = str(base_name.with_suffix("")) + if base_name_stem.endswith(".") and base_name.suffix == "": + # In Python 3.14, a trailing "." is a valid suffix, but in prior versions + # it is left as part of the stem. The suffix check ensures that either way, + # only a single "." will be removed. + base_name_stem = base_name_stem[:-1] + # Add any we find without the rev + for file_to_store in file_path.glob(base_name_stem + ".*"): + if not (file_to_store.is_file()): + continue + try: + store_blob_for_one_material_file(doc, doc.rev, file_to_store) + except Exception as err: + log( + f"Failed to store blob for {doc} rev {doc.rev} " + f"from {file_to_store}: {err}" + ) + + # Get other revisions + for rev in doc.revisions_by_newrevisionevent(): + if rev == doc.rev: + continue # already handled this + + # Add some that have the rev + for file_to_store in file_path.glob(doc.name + f"-{rev}.*"): + if not file_to_store.is_file(): + continue + try: + store_blob_for_one_material_file(doc, rev, file_to_store) + except Exception as err: + log( + f"Failed to store blob for {doc} rev {rev} " + f"from {file_to_store}: {err}" + ) + + +def store_blobs_for_one_meeting(meeting: Meeting): + meeting_documents = ( + Document.objects.filter( + type_id__in=settings.MATERIALS_TYPES_SERVED_BY_WORKER + ).filter( + Q(session__meeting=meeting) | Q(proceedingsmaterial__meeting=meeting) + ) + ).distinct() + + for doc in meeting_documents: + store_blobs_for_one_material_doc(doc) + + def create_recording(session, url, title=None, user=None): ''' Creates the Document type=recording, setting external_url and creating diff --git a/ietf/meeting/views.py b/ietf/meeting/views.py index fcc9312609..cf6fed596b 100644 --- a/ietf/meeting/views.py +++ b/ietf/meeting/views.py @@ -9,6 +9,7 @@ import json import math import os + import pytz import re import tarfile @@ -27,10 +28,12 @@ from django import forms from django.core.cache import caches +from django.core.files.storage import storages from django.shortcuts import render, redirect, get_object_or_404 from django.http import (HttpResponse, HttpResponseRedirect, HttpResponseForbidden, HttpResponseNotFound, Http404, HttpResponseBadRequest, - JsonResponse, HttpResponseGone, HttpResponseNotAllowed) + JsonResponse, HttpResponseGone, HttpResponseNotAllowed, + FileResponse) from django.conf import settings from django.contrib import messages from django.contrib.auth.decorators import login_required @@ -48,18 +51,25 @@ from django.views.decorators.cache import cache_page from django.views.decorators.csrf import ensure_csrf_cookie, csrf_exempt from django.views.generic import RedirectView +from rest_framework.status import HTTP_404_NOT_FOUND import debug # pyflakes:ignore from ietf.doc.fields import SearchableDocumentsField from ietf.doc.models import Document, State, DocEvent, NewRevisionDocEvent -from ietf.doc.storage_utils import remove_from_storage, retrieve_bytes, store_file +from ietf.doc.storage_utils import ( + remove_from_storage, + retrieve_bytes, + store_file, +) from ietf.group.models import Group from ietf.group.utils import can_manage_session_materials, can_manage_some_groups, can_manage_group from ietf.person.models import Person, User from ietf.ietfauth.utils import role_required, has_role, user_is_person from ietf.mailtrigger.utils import gather_address_lists -from ietf.meeting.models import Meeting, Session, Schedule, FloorPlan, SessionPresentation, TimeSlot, SlideSubmission, Attended +from ietf.meeting.models import Meeting, Session, Schedule, FloorPlan, \ + SessionPresentation, TimeSlot, SlideSubmission, Attended +from ..blobdb.models import ResolvedMaterial from ietf.meeting.models import ImportantDate, SessionStatusName, SchedulingEvent, SchedTimeSessAssignment, Room, TimeSlotTypeName from ietf.meeting.models import Registration from ietf.meeting.forms import ( CustomDurationField, SwapDaysForm, SwapTimeslotsForm, ImportMinutesForm, @@ -83,7 +93,8 @@ finalize, generate_proceedings_content, organize_proceedings_sessions, - sort_accept_tuple, + resolve_uploaded_material, + sort_accept_tuple, store_blobs_for_one_material_doc, ) from ietf.meeting.utils import add_event_info_to_session_qs from ietf.meeting.utils import session_time_for_sorting @@ -120,6 +131,8 @@ from icalendar import Calendar, Event from ietf.doc.templatetags.ietf_filters import absurl +from ..api.ietf_utils import requires_api_token +from ..blobdb.storage import BlobdbStorage, BlobFile request_summary_exclude_group_types = ['team'] @@ -245,21 +258,32 @@ def current_materials(request): raise Http404('No such meeting') -def _get_materials_doc(meeting, name): +def _get_materials_doc(name, meeting=None): """Get meeting materials document named by name - Raises Document.DoesNotExist if a match cannot be found. + Raises Document.DoesNotExist if a match cannot be found. If meeting is None, + matches a name that is associated with _any_ meeting. """ + + def _matches_meeting(doc, meeting=None): + if meeting is None: + return doc.get_related_meeting() is not None + return doc.get_related_meeting() == meeting + # try an exact match first doc = Document.objects.filter(name=name).first() - if doc is not None and doc.get_related_meeting() == meeting: + if doc is not None and _matches_meeting(doc, meeting): return doc, None + # try parsing a rev number if "-" in name: docname, rev = name.rsplit("-", 1) if len(rev) == 2 and rev.isdigit(): doc = Document.objects.get(name=docname) # may raise Document.DoesNotExist - if doc.get_related_meeting() == meeting and rev in doc.revisions_by_newrevisionevent(): + if ( + _matches_meeting(doc, meeting) + and rev in doc.revisions_by_newrevisionevent() + ): return doc, rev # give up raise Document.DoesNotExist @@ -277,7 +301,7 @@ def materials_document(request, document, num=None, ext=None): meeting = get_meeting(num, type_in=["ietf", "interim"]) num = meeting.number try: - doc, rev = _get_materials_doc(meeting=meeting, name=document) + doc, rev = _get_materials_doc(name=document, meeting=meeting) except Document.DoesNotExist: raise Http404("No such document for meeting %s" % num) @@ -320,6 +344,7 @@ def materials_document(request, document, num=None, ext=None): { "content": markdown.markdown(bytes.decode(encoding=chset)), "title": filename.name, + "static_ietf_org": settings.STATIC_IETF_ORG, }, ) content_type = content_type.replace("plain", "html", 1) @@ -334,6 +359,133 @@ def materials_document(request, document, num=None, ext=None): return HttpResponseRedirect(redirect_to=doc.get_href(meeting=meeting)) +@requires_api_token("ietf.meeting.views.api_resolve_materials_name") +def api_resolve_materials_name_cached(request, document, num=None, ext=None): + """Resolve materials name into document to a blob spec + + Returns the bucket/name of a blob in the blob store that corresponds to the named + document. Handles resolution of revision if it is not specified and determines the + best extension if one is not provided. Response is JSON. + + As of 2025-10-10 we do not have blobs for all materials documents or for every + format of every document. This API still returns the bucket/name as if the blob + exists. Another API will allow the caller to obtain the file contents using that + name if it cannot be retrieved from the blob store. + """ + + def _error_response(status: int, detail: str): + return JsonResponse( + { + "status": status, + "title": "Error", + "detail": detail, + }, + status=status, + ) + + def _response(bucket: str, name: str): + return JsonResponse( + { + "bucket": bucket, + "name": name, + } + ) + + try: + resolved = ResolvedMaterial.objects.get( + meeting_number=num, name=document + ) + except ResolvedMaterial.DoesNotExist: + return _error_response( + HTTP_404_NOT_FOUND, f"No suitable file for {document} for meeting {num}" + ) + return _response(bucket=resolved.bucket, name=resolved.blob) + + +@requires_api_token +def api_retrieve_materials_blob(request, bucket, name): + """Retrieve contents of a meeting materials blob + + This is intended as a fallback if the web worker cannot retrieve a blob from + the blobstore itself. The most likely cause is retrieving an old materials document + that has not been backfilled. + + If a blob is requested that does not exist, this checks for it on the filesystem + and if found, adds it to the blobstore, creates a StoredObject record, and returns + the contents as it would have done if the blob was already present. + + As a special case, if a requested file with extension `.md.html` does not exist + but a file with the same name but extension `.md` does, `.md` file will be rendered + from markdown to html and returned / stored. + """ + DEFAULT_CONTENT_TYPES = { + ".html": "text/html;charset=utf-8", + ".md": "text/markdown;charset=utf-8", + ".pdf": "application/pdf", + ".txt": "text/plain;charset=utf-8", + } + + def _default_content_type(blob_name: str): + return DEFAULT_CONTENT_TYPES.get(Path(name).suffix, "application/octet-stream") + + if not ( + settings.ENABLE_BLOBSTORAGE + and bucket in settings.MATERIALS_TYPES_SERVED_BY_WORKER + ): + return HttpResponseNotFound(f"Bucket {bucket} not found.") + storage = storages[bucket] # if not configured, a server error will result + assert isinstance(storage, BlobdbStorage) + try: + blob = storage.open(name, "rb") + except FileNotFoundError: + pass + else: + # found the blob - return it + assert isinstance(blob, BlobFile) + return FileResponse( + blob, + filename=name, + content_type=blob.content_type or _default_content_type(name), + ) + + # Did not find the blob. Create it if we can + name_as_path = Path(name) + if name_as_path.suffixes == [".md", ".html"]: + # special case: .md.html means we want to create the .md and the .md.html + # will come along as a bonus + name_to_store = name_as_path.stem # removes the .html + else: + name_to_store = name + + # See if we have a meeting-related document that matches the requested bucket and + # name. + try: + doc, rev = _get_materials_doc(Path(name_to_store).stem) + if doc.type_id != bucket: + raise Document.DoesNotExist + except Document.DoesNotExist: + return HttpResponseNotFound( + f"Document corresponding to {bucket}:{name} not found." + ) + else: + # create all missing blobs for the doc while we're at it + store_blobs_for_one_material_doc(doc) + + # If we can make the blob at all, it now exists, so return it or a 404 + try: + blob = storage.open(name, "rb") + except FileNotFoundError: + return HttpResponseNotFound(f"Object {bucket}:{name} not found.") + else: + # found the blob - return it + assert isinstance(blob, BlobFile) + return FileResponse( + blob, + filename=name, + content_type=blob.content_type or _default_content_type(name), + ) + + @login_required def materials_editable_groups(request, num=None): meeting = get_meeting(num) @@ -2949,6 +3101,7 @@ def upload_session_minutes(request, session_id, num): form.add_error(None, str(err)) else: # no exception -- success! + resolve_uploaded_material(meeting=session.meeting, doc=session.minutes()) messages.success(request, f'Successfully uploaded minutes as revision {session.minutes().rev}.') return redirect('ietf.meeting.views.session_details', num=num, acronym=session.group.acronym) else: @@ -3008,6 +3161,7 @@ def upload_session_narrativeminutes(request, session_id, num): form.add_error(None, str(err)) else: # no exception -- success! + resolve_uploaded_material(meeting=session.meeting, doc=session.narrative_minutes()) messages.success(request, f'Successfully uploaded narrative minutes as revision {session.narrative_minutes().rev}.') return redirect('ietf.meeting.views.session_details', num=num, acronym=session.group.acronym) else: @@ -3154,6 +3308,7 @@ def upload_session_agenda(request, session_id, num): form.add_error(None, save_error) else: doc.save_with_history([e]) + resolve_uploaded_material(meeting=session.meeting, doc=doc) messages.success(request, f'Successfully uploaded agenda as revision {doc.rev}.') return redirect('ietf.meeting.views.session_details',num=num,acronym=session.group.acronym) else: @@ -3337,6 +3492,7 @@ def upload_session_slides(request, session_id, num, name=None): else: doc.save_with_history([e]) post_process(doc) + resolve_uploaded_material(meeting=session.meeting, doc=doc) # Send MeetEcho updates even if we had a problem saving - that will keep it in sync with the # SessionPresentation, which was already saved regardless of problems saving the file. @@ -4737,6 +4893,7 @@ def err(code, text): write_doc_for_session(session, 'chatlog', filename, json.dumps(apidata['chatlog'])) e = NewRevisionDocEvent.objects.create(doc=doc, rev=doc.rev, by=request.user.person, type='new_revision', desc='New revision available: %s'%doc.rev) doc.save_with_history([e]) + resolve_uploaded_material(meeting=session.meeting, doc=doc) return HttpResponse( "Done", status=200, @@ -4785,6 +4942,7 @@ def err(code, text): write_doc_for_session(session, 'polls', filename, json.dumps(apidata['polls'])) e = NewRevisionDocEvent.objects.create(doc=doc, rev=doc.rev, by=request.user.person, type='new_revision', desc='New revision available: %s'%doc.rev) doc.save_with_history([e]) + resolve_uploaded_material(meeting=session.meeting, doc=doc) return HttpResponse( "Done", status=200, @@ -5167,6 +5325,7 @@ def approve_proposed_slides(request, slidesubmission_id, num): doc.store_bytes(target_filename, retrieve_bytes("staging", submission.filename)) remove_from_storage("staging", submission.filename) post_process(doc) + resolve_uploaded_material(meeting=submission.session.meeting, doc=doc) DocEvent.objects.create(type="approved_slides", doc=doc, rev=doc.rev, by=request.user.person, desc="Slides approved") # update meetecho slide info if configured diff --git a/ietf/settings.py b/ietf/settings.py index 9a213c1a73..450e2dc7fa 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -786,29 +786,29 @@ def skip_unreadable_post(record): # Storages for artifacts stored as blobs ARTIFACT_STORAGE_NAMES: list[str] = [ - "bofreq", - "charter", - "conflrev", "active-draft", - "draft", - "slides", - "minutes", "agenda", + "bibxml-ids", "bluesheets", - "procmaterials", - "narrativeminutes", - "statement", - "statchg", - "liai-att", + "bofreq", + "charter", "chatlog", - "polls", - "staging", - "bibxml-ids", - "indexes", + "conflrev", + "draft", "floorplan", + "indexes", + "liai-att", "meetinghostlogo", + "minutes", + "narrativeminutes", "photo", + "polls", + "procmaterials", "review", + "slides", + "staging", + "statchg", + "statement", ] for storagename in ARTIFACT_STORAGE_NAMES: STORAGES[storagename] = { @@ -816,6 +816,20 @@ def skip_unreadable_post(record): "OPTIONS": {"bucket_name": storagename}, } +# Buckets / doc types of meeting materials the CF worker is allowed to serve. This +# differs from the list in Session.meeting_related() by the omission of "recording" +MATERIALS_TYPES_SERVED_BY_WORKER = [ + "agenda", + "bluesheets", + "chatlog", + "minutes", + "narrativeminutes", + "polls", + "procmaterials", + "slides", +] + + # Override this in settings_local.py if needed # *_PATH variables ends with a slash/ . diff --git a/ietf/templates/minimal.html b/ietf/templates/minimal.html index 87f661f501..15c432505e 100644 --- a/ietf/templates/minimal.html +++ b/ietf/templates/minimal.html @@ -9,8 +9,8 @@ {{ title }} - - + + {# load this in the head, to prevent flickering #}