From 3136f32689b6861c7fc96389faab01216e3ee959 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Wed, 4 May 2022 15:45:53 -0300 Subject: [PATCH 01/99] ci: add Dockerfile and action to build celery worker image --- .github/workflows/build-celery-worker.yml | 35 +++++++++ dev/celery/Dockerfile | 88 +++++++++++++++++++++++ requirements.txt | 1 + 3 files changed, 124 insertions(+) create mode 100644 .github/workflows/build-celery-worker.yml create mode 100644 dev/celery/Dockerfile diff --git a/.github/workflows/build-celery-worker.yml b/.github/workflows/build-celery-worker.yml new file mode 100644 index 0000000000..b8c5c44efd --- /dev/null +++ b/.github/workflows/build-celery-worker.yml @@ -0,0 +1,35 @@ +name: Build Celery Worker Docker Image + +on: + push: + branches: + - 'main' + paths: + - 'requirements.txt' + - 'dev/celery/Dockerfile' + + workflow_dispatch: + +env: + REGISTRY: ghcr.io + IMAGE_NAME: datatracker-celery + +jobs: + publish: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - uses: actions/checkout@v2 + + - name: Docker Build & Push Action + uses: mr-smithers-excellent/docker-build-push@v5.6 + with: + image: ${{ env.IMAGE_NAME }} + tags: latest + registry: ${{ env.REGISTRY }} + dockerfile: dev/celery/Dockerfile + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} diff --git a/dev/celery/Dockerfile b/dev/celery/Dockerfile new file mode 100644 index 0000000000..045e5668bb --- /dev/null +++ b/dev/celery/Dockerfile @@ -0,0 +1,88 @@ +FROM python:3.9-bullseye +LABEL maintainer="IETF Tools Team " + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update + +# Install the packages we need +RUN apt-get update --fix-missing +RUN apt-get install -qy \ + apache2-utils \ + apt-file \ + apt-utils \ + bash \ + build-essential \ + curl \ + enscript \ + gawk \ + gcc \ + ghostscript \ + git \ + gnupg \ + graphviz \ + jq \ + less \ + libcairo2-dev \ + libgtk2.0-0 \ + libgtk-3-0 \ + libnotify-dev \ + libgconf-2-4 \ + libgbm-dev \ + libnss3 \ + libxss1 \ + libasound2 \ + libxtst6 \ + libmagic-dev \ + libmariadb-dev \ + locales \ + mariadb-client \ + netcat \ + nodejs \ + pigz \ + pv \ + python3-ipython \ + ripgrep \ + rsync \ + rsyslog \ + ruby \ + ruby-rubygems \ + unzip \ + wget \ + xauth \ + xvfb \ + yang-tools + +# Install kramdown-rfc2629 (ruby) +RUN gem install kramdown-rfc2629 + +# Get rid of installation files we don't need in the image, to reduce size +RUN apt-get clean && rm -rf /var/lib/apt/lists/* + +# Set locale to en_US.UTF-8 +RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment && \ + echo "en_US.UTF-8 UTF-8" >> /etc/locale.gen && \ + echo "LANG=en_US.UTF-8" > /etc/locale.conf && \ + dpkg-reconfigure locales && \ + locale-gen en_US.UTF-8 && \ + update-locale LC_ALL en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 + +# Install idnits +ADD https://raw.githubusercontent.com/ietf-tools/idnits-mirror/main/idnits /usr/local/bin/ +RUN chmod +rx /usr/local/bin/idnits + +# Install current datatracker python dependencies +COPY requirements.txt /tmp/pip-tmp/ +RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \ + && rm -rf /tmp/pip-tmp + +# Turn off rsyslog kernel logging (doesn't work in Docker) +RUN sed -i '/imklog/s/^/#/' /etc/rsyslog.conf + +# Fetch wait-for utility +ADD https://raw.githubusercontent.com/eficode/wait-for/v2.1.3/wait-for /usr/local/bin/ +RUN chmod +rx /usr/local/bin/wait-for + +# Create workspace +RUN mkdir -p /workspace +WORKDIR /workspace diff --git a/requirements.txt b/requirements.txt index 0d226199a0..25335502ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ argon2-cffi>=21.3.0 # For the Argon2 password hasher option beautifulsoup4>=4.11.1 # Only used in tests bibtexparser>=0.6.2,<1.0 # Only used in tests. Version 1.0 doesn't work under python 2.7. 1.0.1 doesn't recognize month names or abbreviations. bleach>=5.0.0 +celery>=5.2.6 coverage>=4.5.4,<5.0 # Coverage 5.x moves from a json database to SQLite. Moving to 5.x will require substantial rewrites in ietf.utils.test_runner and ietf.release.views decorator>=5.1.1 defusedxml>=0.7.1 # for TastyPie when using xml; not a declared dependency From e5edba74fcae150e069dc4b80123bac8a38a311d Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Wed, 4 May 2022 16:00:37 -0300 Subject: [PATCH 02/99] ci: build celery worker on push to jennifer/celery branch --- .github/workflows/build-celery-worker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-celery-worker.yml b/.github/workflows/build-celery-worker.yml index b8c5c44efd..6301c6a5bd 100644 --- a/.github/workflows/build-celery-worker.yml +++ b/.github/workflows/build-celery-worker.yml @@ -3,7 +3,7 @@ name: Build Celery Worker Docker Image on: push: branches: - - 'main' + - 'jennifer/celery' paths: - 'requirements.txt' - 'dev/celery/Dockerfile' From 3e0d9bc1b45e5c5241a85e67f77448461686cbe2 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Wed, 4 May 2022 16:06:05 -0300 Subject: [PATCH 03/99] ci: also build celery worker for main branch --- .github/workflows/build-celery-worker.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-celery-worker.yml b/.github/workflows/build-celery-worker.yml index 6301c6a5bd..32c0119d8c 100644 --- a/.github/workflows/build-celery-worker.yml +++ b/.github/workflows/build-celery-worker.yml @@ -3,6 +3,7 @@ name: Build Celery Worker Docker Image on: push: branches: + - 'main' - 'jennifer/celery' paths: - 'requirements.txt' From a3fa42407733bfea1c077b4a028378a5a7b3010c Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Wed, 4 May 2022 16:08:37 -0300 Subject: [PATCH 04/99] ci: Add comment to celery Dockerfile --- dev/celery/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/celery/Dockerfile b/dev/celery/Dockerfile index 045e5668bb..9731785e46 100644 --- a/dev/celery/Dockerfile +++ b/dev/celery/Dockerfile @@ -1,3 +1,5 @@ +# Dockerfile for celery worker +# FROM python:3.9-bullseye LABEL maintainer="IETF Tools Team " From 6c77c54731e3aa558a43174062cc6253ed9cea2b Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 5 May 2022 13:55:10 -0300 Subject: [PATCH 05/99] chore: first stab at a celery/rabbitmq docker-compose --- docker/docker-compose.celery.yml | 39 ++++++++++++++++++++++++++++++++ docker/rabbitmq.conf | 19 ++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 docker/docker-compose.celery.yml create mode 100644 docker/rabbitmq.conf diff --git a/docker/docker-compose.celery.yml b/docker/docker-compose.celery.yml new file mode 100644 index 0000000000..527a936e8b --- /dev/null +++ b/docker/docker-compose.celery.yml @@ -0,0 +1,39 @@ +version: '3.8' + +services: + mq: + image: rabbitmq:3-alpine + user: '${RABBITMQ_UID:?Must specify RABBITMQ_UID}' + hostname: datatracker-mq + deploy: + resources: + limits: + memory: 1gb # coordinate with settings in rabbitmq.conf + reservations: + memory: 512mb + ports: + - '${MQ_PORT:-5672}:5672' + volumes: + - /var/lib/rabbitmq + - ./rabbitmq.conf:/etc/rabbitmq/conf.d/90-ietf.conf + - ./definitions.json:/ietf-conf/definitions.json + restart: unless-stopped +# logging: +# driver: "syslog" +# options: +# syslog-address: "tcp://ietfa.amsl.com:514" + + celery: + image: ghcr.io/painless-security/datatracker-celery:latest + command: celery --app=ietf worker --loglevel=INFO + user: '${CELERY_UID:?Must specify CELERY_UID}' + volumes: + - ..:/workspace + - '${MYSQL_SOCKET_PATH:-/run/mysqld}:/run/mysqld' + depends_on: + - mq + restart: unless-stopped +# logging: +# driver: "syslog" +# options: +# syslog-address: "tcp://ietfa.amsl.com:514" diff --git a/docker/rabbitmq.conf b/docker/rabbitmq.conf new file mode 100644 index 0000000000..8603d0a4cb --- /dev/null +++ b/docker/rabbitmq.conf @@ -0,0 +1,19 @@ +# prevent guest from logging in over tcp +loopback_users.guest = true + +# load saved definitions +load_definitions = /ietf-conf/definitions.json + +# Ensure that enough disk is available to flush to disk. To do this, need to limit the +# memory available to the container to something reasonable. See +# https://www.rabbitmq.com/production-checklist.html#monitoring-and-resource-usage +# for recommendations. + +# 1-1.5 times the memory available to the container is adequate for disk limit +disk_free_limit.absolute = 1.5GB + +# This should be ~40% of the memory available to the container. Use an +# absolute number because relative will be proprtional to the full machine +# memory. +vm_memory_high_watermark.absolute = 400MB + From 048ca8565ca0342fe0f6f3e4836eddbd1ad25f82 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 5 May 2022 14:06:24 -0300 Subject: [PATCH 06/99] feat: add celery configuration and test task / endpoint --- ietf/__init__.py | 7 +++++++ ietf/celeryapp.py | 22 ++++++++++++++++++++++ ietf/settings.py | 6 ++++++ ietf/submit/tasks.py | 12 ++++++++++++ ietf/submit/urls.py | 3 +++ ietf/submit/views.py | 18 +++++++++++------- 6 files changed, 61 insertions(+), 7 deletions(-) create mode 100644 ietf/celeryapp.py create mode 100644 ietf/submit/tasks.py diff --git a/ietf/__init__.py b/ietf/__init__.py index 133a5d5abf..2338d0428b 100644 --- a/ietf/__init__.py +++ b/ietf/__init__.py @@ -16,3 +16,10 @@ # set this to ".p1", ".p2", etc. after patching __patch__ = "" + + +# This will make sure the app is always imported when +# Django starts so that shared_task will use this app. +from .celeryapp import app as celery_app + +__all__ = ('celery_app',) diff --git a/ietf/celeryapp.py b/ietf/celeryapp.py new file mode 100644 index 0000000000..cefde3a8d3 --- /dev/null +++ b/ietf/celeryapp.py @@ -0,0 +1,22 @@ +import os + +from celery import Celery + +# Set the default Django settings module for the 'celery' program +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'ietf.settings') + +app = Celery('ietf') + +# Using a string here means the worker doesn't have to serialize +# the configuration object to child processes. +# - namespace='CELERY' means all celery-related configuration keys +# should have a `CELERY_` prefix. +app.config_from_object('django.conf:settings', namespace='CELERY') + +# Load task modules from all registered Django apps. +app.autodiscover_tasks() + + +@app.task(bind=True) +def debug_task(self): + print(f'Request: {self.request!r}') diff --git a/ietf/settings.py b/ietf/settings.py index 692a0ff45a..a27ec23068 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -1169,6 +1169,12 @@ def skip_unreadable_post(record): DEFAULT_REQUESTS_TIMEOUT = 20 # seconds +# Celery configuration +CELERY_TIMEZONE = 'UTC' +CELERY_BROKER_URL = 'amqp://mq/' +CELERY_ACKS_LATE = True # failed tasks will be retried; keep tasks idempotent or disable per-task + + # Meetecho API setup: Uncomment this and provide real credentials to enable # Meetecho conference creation for interim session requests # diff --git a/ietf/submit/tasks.py b/ietf/submit/tasks.py new file mode 100644 index 0000000000..85a546fe71 --- /dev/null +++ b/ietf/submit/tasks.py @@ -0,0 +1,12 @@ +# Copyright The IETF Trust 2022, All Rights Reserved +# +# Celery task definitions +# +from celery import shared_task + +from ietf.utils import log + + +@shared_task(bind=True) +def poke(self): + log.log(f'Poked {self.name}, request id {self.request.id}') diff --git a/ietf/submit/urls.py b/ietf/submit/urls.py index 1d99f94070..2309ec55cd 100644 --- a/ietf/submit/urls.py +++ b/ietf/submit/urls.py @@ -25,4 +25,7 @@ url(r'^manualpost/email/(?P\d+)/(?P\d+)/(?P[a-f\d]*)/$', views.show_submission_email_message), url(r'^manualpost/replyemail/(?P\d+)/(?P\d+)/$', views.send_submission_email), url(r'^manualpost/sendemail/(?P\d+)/$', views.send_submission_email), + + # proof-of-concept for celery async tasks + url(r'^async-poke/?$', views.async_poke_test), ] \ No newline at end of file diff --git a/ietf/submit/views.py b/ietf/submit/views.py index 9eb303eb26..09b006c1d4 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -34,6 +34,7 @@ from ietf.submit.mail import send_full_url, send_manual_post_request, add_submission_email, get_reply_to from ietf.submit.models import (Submission, Preapproval, SubmissionExtResource, DraftSubmissionStateName, SubmissionEmailEvent ) +from ietf.submit.tasks import poke from ietf.submit.utils import ( approvable_submissions_for_user, preapprovals_for_user, recently_approved_by_user, validate_submission, create_submission_event, docevent_from_submission, post_submission, cancel_submission, rename_submission_files, remove_submission_files, get_draft_meta, @@ -175,7 +176,7 @@ def err(code, text): raise ValidationError('Submitter %s is not one of the document authors' % user.username) submission.submitter = user.person.formatted_email() - sent_to = accept_submission(request, submission) + sent_to = accept_submission(submission, request) return HttpResponse( "Upload of %s OK, confirmation requests sent to:\n %s" % (submission.name, ',\n '.join(sent_to)), @@ -365,13 +366,13 @@ def submission_status(request, submission_id, access_token=None): permission_denied(request, 'You do not have permission to perform this action') # go directly to posting submission - docevent_from_submission(request, submission, desc="Uploaded new revision") + docevent_from_submission(submission, desc="Uploaded new revision") desc = "Secretariat manually posting. Approvals already received" post_submission(request, submission, desc, desc) else: - accept_submission(request, submission, autopost=True) + accept_submission(submission, request, autopost=True) if access_token: return redirect("ietf.submit.views.submission_status", submission_id=submission.pk, access_token=access_token) @@ -698,9 +699,7 @@ def cancel_waiting_for_draft(request): create_submission_event(request, submission, "Cancelled submission") if (submission.rev != "00"): # Add a doc event - docevent_from_submission(request, - submission, - "Cancelled submission for rev {}".format(submission.rev)) + docevent_from_submission(submission, "Cancelled submission for rev {}".format(submission.rev)) return redirect("ietf.submit.views.manualpost") @@ -923,4 +922,9 @@ def get_submission_or_404(submission_id, access_token=None): if access_token and not key_matched: raise Http404 - return submission \ No newline at end of file + return submission + + +def async_poke_test(request): + result = poke.delay() + return HttpResponse(f'Poked {result}', content_type='text/plain') From 17972065db1ea230b85b55a1380a554cbb800bc3 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 5 May 2022 15:58:17 -0300 Subject: [PATCH 07/99] chore: run mq/celery containers for dev work --- dev/celery/Dockerfile | 6 ++++++ dev/celery/docker-init.sh | 12 ++++++++++++ docker-compose.yml | 12 ++++++++++++ docker/docker-compose.celery.yml | 6 +++++- docker/docker-compose.extend.yml | 3 +++ 5 files changed, 38 insertions(+), 1 deletion(-) create mode 100755 dev/celery/docker-init.sh diff --git a/dev/celery/Dockerfile b/dev/celery/Dockerfile index 9731785e46..4d084ccee2 100644 --- a/dev/celery/Dockerfile +++ b/dev/celery/Dockerfile @@ -88,3 +88,9 @@ RUN chmod +rx /usr/local/bin/wait-for # Create workspace RUN mkdir -p /workspace WORKDIR /workspace + +# Install endpoint script +COPY dev/celery/docker-init.sh /docker-init.sh +RUN chmod +rx /docker-init.sh + +ENTRYPOINT [ "/docker-init.sh" ] \ No newline at end of file diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh new file mode 100755 index 0000000000..4245cff814 --- /dev/null +++ b/dev/celery/docker-init.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +WORKSPACEDIR="/workspace" + +cd "$WORKSPACEDIR" || exit 255 + +if [[ -n "${UPDATE_REQUIREMENTS}" && -r requirements.txt ]]; then + echo "Updating requirements..." + pip install --upgrade -r requirements.txt +fi + +celery --app="${CELERY_APP:-ietf}" worker "$@" diff --git a/docker-compose.yml b/docker-compose.yml index 3f2ff51fff..4d420e7338 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -64,5 +64,17 @@ services: # Add "forwardPorts": ["5432"] to **devcontainer.json** to forward PostgreSQL locally. # (Adding the "ports" property to this file will not forward from a Codespace.) + mq: + image: rabbitmq:3-alpine + restart: unless-stopped + + celery: + image: celery + environment: + CELERY_APP: ietf + UPDATE_REQUIREMENTS: 1 + command: + - '--loglevel=INFO' + volumes: mariadb-data: diff --git a/docker/docker-compose.celery.yml b/docker/docker-compose.celery.yml index 527a936e8b..97d7af67a5 100644 --- a/docker/docker-compose.celery.yml +++ b/docker/docker-compose.celery.yml @@ -25,7 +25,11 @@ services: celery: image: ghcr.io/painless-security/datatracker-celery:latest - command: celery --app=ietf worker --loglevel=INFO + environment: + CELERY_APP: ietf + # UPDATE_REQUIREMENTS: 1 # uncomment to update Python requirements on startup + command: + - '--loglevel=INFO' user: '${CELERY_UID:?Must specify CELERY_UID}' volumes: - ..:/workspace diff --git a/docker/docker-compose.extend.yml b/docker/docker-compose.extend.yml index 36d1a61e70..49735d8de3 100644 --- a/docker/docker-compose.extend.yml +++ b/docker/docker-compose.extend.yml @@ -11,3 +11,6 @@ services: db: ports: - '3306' + celery: + volumes: + - .:/workspace From aa18578a277eda68c0efeca14d118ea5cd11d68b Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 5 May 2022 16:12:45 -0300 Subject: [PATCH 08/99] chore: point to ghcr.io image for celery worker --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 4d420e7338..e0d8a6f182 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -69,7 +69,7 @@ services: restart: unless-stopped celery: - image: celery + image: ghcr.io/painless-security/datatracker-celery:latest environment: CELERY_APP: ietf UPDATE_REQUIREMENTS: 1 From 7012bed860916a019b6a5a02f5dae6a0077a641c Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 9 May 2022 12:49:19 -0300 Subject: [PATCH 09/99] refactor: move XML parsing duties into XMLDraft Move some PlaintextDraft methods into the Draft base class and implement for the XMLDraft class. Use xml2rfc code from ietf.submit as a model for the parsing. This leaves some mismatch between the PlaintextDraft and the Draft class spec for the get_author_list() method to be resolved. --- ietf/utils/draft.py | 25 ++++++++++++++++ ietf/utils/xmldraft.py | 68 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/ietf/utils/draft.py b/ietf/utils/draft.py index 78705637fc..0b53313c36 100755 --- a/ietf/utils/draft.py +++ b/ietf/utils/draft.py @@ -143,17 +143,36 @@ def get_abstract(self): raise NotImplementedError def get_author_list(self): + """Get detailed author list + + Returns a list of dicts with the following keys: + full_name, first_name, middle_initial, last_name, + name_suffix, email, country, company + Values will be None if not available + """ raise NotImplementedError def get_authors(self): + """Get simple author list + + Get as list of strings with author name and email within angle brackets + """ raise NotImplementedError def get_authors_with_firm(self): + """Get simple list of authors with firm (company) info + + Get as list of strings with author name and email within angle brackets and + company in parentheses + """ raise NotImplementedError def get_creation_date(self): raise NotImplementedError + def get_draftname(self): + raise NotImplementedError + def get_formal_languages(self): raise NotImplementedError @@ -357,6 +376,10 @@ def begpage(pages, page, newpage, line=None): _debug('pages: %s' % len(pages)) return stripped, pages + # ---------------------------------------------------------------------- + def get_draftname(self): + return self.filename + # ---------------------------------------------------------------------- def get_pagecount(self): if self._pagecount == None: @@ -559,6 +582,8 @@ def get_authors_with_firm(self): def get_author_list(self): # () -> List[List[str, str, str, str, str, str, str]] """Returns a list of tuples, with each tuple containing (given_names, surname, email, company). Email will be None if unknown. + + Todo update to agree with superclass method signature """ if self._author_info == None: self.extract_authors() diff --git a/ietf/utils/xmldraft.py b/ietf/utils/xmldraft.py index 133a766d1a..cc9d90f59d 100644 --- a/ietf/utils/xmldraft.py +++ b/ietf/utils/xmldraft.py @@ -1,5 +1,6 @@ # Copyright The IETF Trust 2021, All Rights Reserved # -*- coding: utf-8 -*- +import io import os import xml2rfc @@ -33,7 +34,9 @@ def parse_xml(filename): orig_write_out = xml2rfc.log.write_out orig_write_err = xml2rfc.log.write_err orig_xml_library = os.environ.get('XML_LIBRARY', None) - tree = None + parser_out = io.StringIO() + parser_err = io.StringIO() + with ExitStack() as stack: @stack.callback def cleanup(): # called when context exited, even if there's an exception @@ -43,12 +46,16 @@ def cleanup(): # called when context exited, even if there's an exception if orig_xml_library is not None: os.environ['XML_LIBRARY'] = orig_xml_library - xml2rfc.log.write_out = open(os.devnull, 'w') - xml2rfc.log.write_err = open(os.devnull, 'w') + xml2rfc.log.write_out = parser_out + xml2rfc.log.write_err = parser_err os.environ['XML_LIBRARY'] = settings.XML_LIBRARY parser = xml2rfc.XmlRfcParser(filename, quiet=True) - tree = parser.parse() + try: + tree = parser.parse() + except Exception as e: + raise XMLParseError(parser_out.getvalue(), parser_err.getvalue()) from e + xml_version = tree.getroot().get('version', '2') if xml_version == '2': v2v3 = xml2rfc.V2v3XmlWriter(tree) @@ -83,6 +90,48 @@ def _reference_section_name(self, section_elt): section_name = section_elt.get('title') # fall back to title if we have it return section_name + def get_draftname(self): + return self.xmlroot.attrib.get('docName') + + def get_title(self): + return self.xmlroot.findtext('front/title').strip() + + def get_abstract(self): + abstract = self.xmlroot.findtext('front/abstract') + return abstract.strip() if abstract else '' + + def get_author_list(self): + """Get detailed author list + + Returns a list of dicts with the following keys: + name, first_name, middle_initial, last_name, + name_suffix, email, country, affiliation + Values will be None if not available + """ + result = [] + empty_author = { + k: None for k in [ + 'name', 'first_name', 'middle_initial', 'last_name', + 'name_suffix', 'email', 'country', 'affiliation', + ] + } + + for author in self.xmlroot.findall('front/author'): + info = { + 'name': author.attrib.get('fullname'), + 'email': author.findtext('address/email'), + 'affiliation': author.findtext('organization'), + } + elem = author.find('address/postal/country') + if elem is not None: + ascii_country = elem.get('ascii', None) + info['country'] = ascii_country if ascii_country else elem.text + for item in info: + if info[item]: + info[item] = info[item].strip() + result.append(empty_author | info) # merge, preferring info + return result + def get_refs(self): """Extract references from the draft""" refs = {} @@ -92,3 +141,14 @@ def get_refs(self): for ref in (section.findall('./reference') + section.findall('./referencegroup')): refs[self._document_name(ref.get('anchor'))] = ref_type return refs + + +class XMLParseError(Exception): + """An error occurred while parsing""" + def __init__(self, out: str, err: str, *args): + super().__init__(*args) + self._out = out + self._err = err + + def parser_msgs(self): + return self._out.splitlines() + self._err.splitlines() From 2cc91c0b6078fc226d1ff0349d12ee98e0eff001 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 9 May 2022 13:21:52 -0300 Subject: [PATCH 10/99] feat: add api_upload endpoint and beginnings of async processing This adds an api_upload() that behaves analogously to the api_submit() endpoint. Celery tasks to handle asynchronous processing are added but are not yet functional enough to be useful. --- ietf/api/urls.py | 2 + ietf/submit/forms.py | 322 ++++++++++++++++++++++++++++++++++++++++++- ietf/submit/mail.py | 4 +- ietf/submit/tasks.py | 55 +++++++- ietf/submit/utils.py | 129 +++++++++++++---- ietf/submit/views.py | 95 +++++++++++++ ietf/utils/mail.py | 5 +- ietf/utils/text.py | 1 + 8 files changed, 574 insertions(+), 39 deletions(-) diff --git a/ietf/api/urls.py b/ietf/api/urls.py index ee1779a09b..65a51cd0f4 100644 --- a/ietf/api/urls.py +++ b/ietf/api/urls.py @@ -38,6 +38,8 @@ url(r'^openid/', include('oidc_provider.urls', namespace='oidc_provider')), # Draft submission API url(r'^submit/?$', submit_views.api_submit), + # Draft upload API + url(r'^upload/?$', submit_views.api_upload), # Datatracker version url(r'^version/?$', api_views.version), # Application authentication API key diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index 06dfd9bbc3..120ed0933a 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -40,12 +40,13 @@ from ietf.utils import log from ietf.utils.draft import PlaintextDraft from ietf.utils.text import normalize_text +from ietf.utils.xmldraft import XMLDraft, XMLParseError -class SubmissionBaseUploadForm(forms.Form): +class DeprecatedSubmissionBaseUploadForm(forms.Form): xml = forms.FileField(label='.xml format', required=True) def __init__(self, request, *args, **kwargs): - super(SubmissionBaseUploadForm, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.remote_ip = remote_ip(request) @@ -390,7 +391,7 @@ def cleanup(): # called when context exited, even in case of exception settings.IDSUBMIT_MAX_DAILY_SUBMISSIONS, settings.IDSUBMIT_MAX_DAILY_SUBMISSIONS_SIZE, ) - return super(SubmissionBaseUploadForm, self).clean() + return super().clean() def check_submissions_tresholds(self, which, filter_kwargs, max_amount, max_size): submissions = Submission.objects.filter(**filter_kwargs) @@ -447,7 +448,320 @@ def deduce_group(self): raise forms.ValidationError('Draft names starting with draft-%s- are restricted, please pick a differen name' % ntype) return None -class SubmissionManualUploadForm(SubmissionBaseUploadForm): +class SubmissionBaseUploadForm(forms.Form): + xml = forms.FileField(label='.xml format', required=True) + + def __init__(self, request, *args, **kwargs): + super(SubmissionBaseUploadForm, self).__init__(*args, **kwargs) + + self.remote_ip = remote_ip(request) + + self.request = request + self.in_first_cut_off = False + self.cutoff_warning = "" + self.shutdown = False + self.set_cutoff_warnings() + + self.group = None + self.filename = None + self.revision = None + self.title = None + self.abstract = None + self.authors = [] + self.parsed_draft = None + self.file_types = [] + self.file_info = {} # indexed by file field name, e.g., 'txt', 'xml', ... + self.xml_version = None + # No code currently (14 Sep 2017) uses this class directly; it is + # only used through its subclasses. The two assignments below are + # set to trigger an exception if it is used directly only to make + # sure that adequate consideration is made if it is decided to use it + # directly in the future. Feel free to set these appropriately to + # avoid the exceptions in that case: + self.formats = None # None will raise an exception in clean() if this isn't changed in a subclass + self.base_formats = None # None will raise an exception in clean() if this isn't changed in a subclass + + def set_cutoff_warnings(self): + now = datetime.datetime.now(pytz.utc) + meeting = Meeting.get_current_meeting() + if not meeting: + return + # + cutoff_00 = meeting.get_00_cutoff() + cutoff_01 = meeting.get_01_cutoff() + reopen = meeting.get_reopen_time() + # + cutoff_00_str = cutoff_00.strftime("%Y-%m-%d %H:%M %Z") + cutoff_01_str = cutoff_01.strftime("%Y-%m-%d %H:%M %Z") + reopen_str = reopen.strftime("%Y-%m-%d %H:%M %Z") + + # Workaround for IETF107. This would be better handled by a refactor that allowed meetings to have no cutoff period. + if cutoff_01 >= reopen: + return + + if cutoff_00 == cutoff_01: + if now.date() >= (cutoff_00.date() - meeting.idsubmit_cutoff_warning_days) and now.date() < cutoff_00.date(): + self.cutoff_warning = ( 'The last submission time for Internet-Drafts before %s is %s.

' % (meeting, cutoff_00_str)) + elif now <= cutoff_00: + self.cutoff_warning = ( + 'The last submission time for new Internet-Drafts before the meeting is %s.
' + 'After that, you will not be able to submit drafts until after %s (IETF-meeting local time)' % (cutoff_00_str, reopen_str, )) + else: + if now.date() >= (cutoff_00.date() - meeting.idsubmit_cutoff_warning_days) and now.date() < cutoff_00.date(): + self.cutoff_warning = ( 'The last submission time for new documents (i.e., version -00 Internet-Drafts) before %s is %s.

' % (meeting, cutoff_00_str) + + 'The last submission time for revisions to existing documents before %s is %s.
' % (meeting, cutoff_01_str) ) + elif now.date() >= cutoff_00.date() and now <= cutoff_01: + # We are in the first_cut_off + if now < cutoff_00: + self.cutoff_warning = ( + 'The last submission time for new documents (i.e., version -00 Internet-Drafts) before the meeting is %s.
' + 'After that, you will not be able to submit a new document until after %s (IETF-meeting local time)' % (cutoff_00_str, reopen_str, )) + else: # No 00 version allowed + self.cutoff_warning = ( + 'The last submission time for new documents (i.e., version -00 Internet-Drafts) was %s.
' + 'You will not be able to submit a new document until after %s (IETF-meeting local time).

' + 'You can still submit a version -01 or higher Internet-Draft until %s' % (cutoff_00_str, reopen_str, cutoff_01_str, )) + self.in_first_cut_off = True + if now > cutoff_01 and now < reopen: + self.cutoff_warning = ( + 'The last submission time for the I-D submission was %s.

' + 'The I-D submission tool will be reopened after %s (IETF-meeting local time).' % (cutoff_01_str, reopen_str)) + self.shutdown = True + + def clean_file(self, field_name, parser_class): + f = self.cleaned_data[field_name] + if not f: + return f + + self.file_info[field_name] = parser_class(f).critical_parse() + if self.file_info[field_name].errors: + raise forms.ValidationError(self.file_info[field_name].errors) + return f + + def clean_xml(self): + return self.clean_file("xml", XMLParser) + + def clean(self): + def format_messages(where, e, log_msgs): + m = str(e) + if m: + m = [m] + else: + import traceback + typ, val, tb = sys.exc_info() + m = traceback.format_exception(typ, val, tb) + m = [ l.replace('\n ', ':\n ') for l in m ] + msgs = [s for s in (["Error from xml2rfc (%s):" % (where,)] + m + log_msgs) if s] + return msgs + + if self.shutdown and not has_role(self.request.user, "Secretariat"): + raise forms.ValidationError('The submission tool is currently shut down') + + for ext in self.formats: + f = self.cleaned_data.get(ext, None) + if not f: + continue + self.file_types.append('.%s' % ext) + if not ('.txt' in self.file_types or '.xml' in self.file_types): + if not self.errors: + raise forms.ValidationError('Unexpected submission file types; found %s, but %s is required' % (', '.join(self.file_types), ' or '.join(self.base_formats))) + + if self.cleaned_data.get('xml'): + xml_file = self.cleaned_data.get('xml') + tfn = None + with ExitStack() as stack: + @stack.callback + def cleanup(): # called when context exited, even in case of exception + if tfn is not None: + os.unlink(tfn) + + # We need to write the xml file to disk in order to hand it + # over to the xml parser. XXX FIXME: investigate updating + # xml2rfc to be able to work with file handles to in-memory + # files. + name, ext = os.path.splitext(os.path.basename(xml_file.name)) + with tempfile.NamedTemporaryFile(prefix=name+'-', + suffix='.xml', + mode='wb+', + delete=False) as tf: + tfn = tf.name + for chunk in xml_file.chunks(): + tf.write(chunk) + + try: + xml_draft = XMLDraft(tfn) + except XMLParseError as e: + msgs = format_messages('txt', e, e.parser_msgs()) + log.log('\n'.join(msgs)) + self.add_error('xml', msgs) + # todo other error handling??? + + draftname = xml_draft.get_draftname() + if draftname is None: + self.add_error('xml', "No docName attribute found in the xml root element") + name_error = validate_submission_name(draftname) + if name_error: + self.add_error('xml', name_error) # This is a critical and immediate failure - do not proceed with other validation. + else: + revmatch = re.search("-[0-9][0-9]$", draftname) + if revmatch: + self.revision = draftname[-2:] + self.filename = draftname[:-3] + else: + self.revision = None + self.filename = draftname + self.authors = xml_draft.get_author_list() + self.title = xml_draft.get_title() + + # The following errors are likely noise if we have previous field + # errors: + if self.errors: + raise forms.ValidationError('') + + if self.cleaned_data.get('txt'): + # try to parse it + txt_file = self.cleaned_data['txt'] + txt_file.seek(0) + bytes = txt_file.read() + txt_file.seek(0) + try: + text = bytes.decode(self.file_info['txt'].charset) + # + self.parsed_draft = PlaintextDraft(text, txt_file.name) + if self.filename == None: + self.filename = self.parsed_draft.filename + elif self.filename != self.parsed_draft.filename: + self.add_error('txt', "Inconsistent name information: xml:%s, txt:%s" % (self.filename, self.parsed_draft.filename)) + if self.revision == None: + self.revision = self.parsed_draft.revision + elif self.revision != self.parsed_draft.revision: + self.add_error('txt', "Inconsistent revision information: xml:%s, txt:%s" % (self.revision, self.parsed_draft.revision)) + except (UnicodeDecodeError, LookupError) as e: + self.add_error('txt', 'Failed decoding the uploaded file: "%s"' % str(e)) + + rev_error = validate_submission_rev(self.filename, self.revision) + if rev_error: + raise forms.ValidationError(rev_error) + + # The following errors are likely noise if we have previous field + # errors: + if self.errors: + raise forms.ValidationError('') + + if not self.filename: + raise forms.ValidationError("Could not extract a valid draft name from the upload. " + "To fix this in a text upload, please make sure that the full draft name including " + "revision number appears centered on its own line below the document title on the " + "first page. In an xml upload, please make sure that the top-level " + "element has a docName attribute which provides the full draft name including " + "revision number.") + + if not self.revision: + raise forms.ValidationError("Could not extract a valid draft revision from the upload. " + "To fix this in a text upload, please make sure that the full draft name including " + "revision number appears centered on its own line below the document title on the " + "first page. In an xml upload, please make sure that the top-level " + "element has a docName attribute which provides the full draft name including " + "revision number.") + + if not self.title: + raise forms.ValidationError("Could not extract a valid title from the upload") + + if self.cleaned_data.get('txt') or self.cleaned_data.get('xml'): + # check group + self.group = self.deduce_group() + # check existing + existing = Submission.objects.filter(name=self.filename, rev=self.revision).exclude(state__in=("posted", "cancel", "waiting-for-draft")) + if existing: + raise forms.ValidationError(mark_safe('A submission with same name and revision is currently being processed. Check the status here.' % urlreverse("ietf.submit.views.submission_status", kwargs={ 'submission_id': existing[0].pk }))) + + # cut-off + if self.revision == '00' and self.in_first_cut_off: + raise forms.ValidationError(mark_safe(self.cutoff_warning)) + # check thresholds + today = datetime.date.today() + + self.check_submissions_thresholds( + "for the draft %s" % self.filename, + dict(name=self.filename, rev=self.revision, submission_date=today), + settings.IDSUBMIT_MAX_DAILY_SAME_DRAFT_NAME, settings.IDSUBMIT_MAX_DAILY_SAME_DRAFT_NAME_SIZE, + ) + self.check_submissions_thresholds( + "for the same submitter", + dict(remote_ip=self.remote_ip, submission_date=today), + settings.IDSUBMIT_MAX_DAILY_SAME_SUBMITTER, settings.IDSUBMIT_MAX_DAILY_SAME_SUBMITTER_SIZE, + ) + if self.group: + self.check_submissions_thresholds( + "for the group \"%s\"" % (self.group.acronym), + dict(group=self.group, submission_date=today), + settings.IDSUBMIT_MAX_DAILY_SAME_GROUP, settings.IDSUBMIT_MAX_DAILY_SAME_GROUP_SIZE, + ) + self.check_submissions_thresholds( + "across all submitters", + dict(submission_date=today), + settings.IDSUBMIT_MAX_DAILY_SUBMISSIONS, settings.IDSUBMIT_MAX_DAILY_SUBMISSIONS_SIZE, + ) + return super().clean() + + def check_submissions_thresholds(self, which, filter_kwargs, max_amount, max_size): + submissions = Submission.objects.filter(**filter_kwargs) + + if len(submissions) > max_amount: + raise forms.ValidationError("Max submissions %s has been reached for today (maximum is %s submissions)." % (which, max_amount)) + if sum(s.file_size for s in submissions if s.file_size) > max_size * 1024 * 1024: + raise forms.ValidationError("Max uploaded amount %s has been reached for today (maximum is %s MB)." % (which, max_size)) + + def deduce_group(self): + """Figure out group from name or previously submitted draft, returns None if individual.""" + name = self.filename + existing_draft = Document.objects.filter(name=name, type="draft") + if existing_draft: + group = existing_draft[0].group + if group and group.type_id not in ("individ", "area"): + return group + else: + return None + else: + name_parts = name.split("-") + if len(name_parts) < 3: + raise forms.ValidationError("The draft name \"%s\" is missing a third part, please rename it" % name) + + if name.startswith('draft-ietf-') or name.startswith("draft-irtf-"): + if name_parts[1] == "ietf": + group_type = "wg" + elif name_parts[1] == "irtf": + group_type = "rg" + else: + group_type = None + + # first check groups with dashes + for g in Group.objects.filter(acronym__contains="-", type=group_type): + if name.startswith('draft-%s-%s-' % (name_parts[1], g.acronym)): + return g + + try: + return Group.objects.get(acronym=name_parts[2], type=group_type) + except Group.DoesNotExist: + raise forms.ValidationError('There is no active group with acronym \'%s\', please rename your draft' % name_parts[2]) + + elif name.startswith("draft-rfc-"): + return Group.objects.get(acronym="iesg") + elif name.startswith("draft-rfc-editor-") or name.startswith("draft-rfced-") or name.startswith("draft-rfceditor-"): + return Group.objects.get(acronym="rfceditor") + else: + ntype = name_parts[1].lower() + # This covers group types iesg, iana, iab, ise, and others: + if GroupTypeName.objects.filter(slug=ntype).exists(): + group = Group.objects.filter(acronym=ntype).first() + if group: + return group + else: + raise forms.ValidationError('Draft names starting with draft-%s- are restricted, please pick a differen name' % ntype) + return None + + +class SubmissionManualUploadForm(DeprecatedSubmissionBaseUploadForm): xml = forms.FileField(label='.xml format', required=False) # xml field with required=False instead of True txt = forms.FileField(label='.txt format', required=False) # We won't permit html upload until we can verify that the content diff --git a/ietf/submit/mail.py b/ietf/submit/mail.py index 35ea726e85..16d1f09734 100644 --- a/ietf/submit/mail.py +++ b/ietf/submit/mail.py @@ -307,9 +307,7 @@ def add_submission_email(request, remote_ip, name, rev, submission_pk, message, create_submission_event(request, submission, desc) - docevent_from_submission(request, - submission, - desc) + docevent_from_submission(submission, desc) except Exception as e: log("Exception: %s\n" % e) raise diff --git a/ietf/submit/tasks.py b/ietf/submit/tasks.py index 85a546fe71..00e25c1094 100644 --- a/ietf/submit/tasks.py +++ b/ietf/submit/tasks.py @@ -2,11 +2,64 @@ # # Celery task definitions # -from celery import shared_task +from celery import chain, shared_task +from pathlib import Path +from django.conf import settings +from django.utils.module_loading import import_string + +from ietf.submit.models import Submission +from ietf.submit import utils from ietf.utils import log +@shared_task +def apply_checker(checker_path, submission_id): + try: + checker_class = import_string(checker_path) + except ImportError: + # todo fail + raise + submission = Submission.objects.get(pk=submission_id) + + basename = Path(settings.IDSUBMIT_STAGING_PATH) / f'{submission.name}-{submission.rev}' + utils.apply_checker( + checker_class(), + submission, + { + ext: basename.with_suffix(f'.{ext}') + for ext in ['xml', 'txt', 'html'] + } + ) + + +@shared_task +def accept_submission(submission_id): + submission = Submission.objects.get(pk=submission_id) + errors = [c.message for c in submission.checks.filter(passed__isnull=False) if not c.passed] + if errors: + # utils.remove_submission_files(submission) + Submission.objects.filter(pk=submission_id).update(state_id='cancel') + return 'egad' + else: + utils.accept_submission(submission) + return 'yippie' + + +def check_and_accept_submission(submission_id): + checks = [ + apply_checker.si(checker_path, submission_id) + for checker_path in settings.IDSUBMIT_CHECKER_CLASSES + ] + return chain(*checks, accept_submission.si(submission_id)) + + +@shared_task +def render_missing_formats(submission_id): + submission = Submission.objects.get(pk=submission_id) + utils.render_missing_formats(submission) + + @shared_task(bind=True) def poke(self): log.log(f'Poked {self.name}, request id {self.request.id}') diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 429fea58e8..1476750ae6 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -8,8 +8,9 @@ import pathlib import re import time +import xml2rfc -from typing import Callable, Optional # pyflakes:ignore +from typing import Optional # pyflakes:ignore from django.conf import settings from django.core.exceptions import ValidationError @@ -209,8 +210,8 @@ def create_submission_event(request, submission, desc): SubmissionEvent.objects.create(submission=submission, by=by, desc=desc) -def docevent_from_submission(request, submission, desc, who=None): - # type: (HttpRequest, Submission, str, Optional[Person]) -> Optional[DocEvent] +def docevent_from_submission(submission, desc, who=None): + # type: (Submission, str, Optional[Person]) -> Optional[DocEvent] log.assertion('who is None or isinstance(who, Person)') try: @@ -814,6 +815,10 @@ def turn_into_unicode(s): def get_submission(form): + # See if there is a Submission in state waiting-for-draft + # for this revision. + # If so - we're going to update it otherwise we create a new object + submissions = Submission.objects.filter(name=form.filename, rev=form.revision, state_id = "waiting-for-draft").distinct() @@ -827,32 +832,29 @@ def get_submission(form): def fill_in_submission(form, submission, authors, abstract, file_size): - # See if there is a Submission in state waiting-for-draft - # for this revision. - # If so - we're going to update it otherwise we create a new object - submission.state = DraftSubmissionStateName.objects.get(slug="uploaded") submission.remote_ip = form.remote_ip submission.title = form.title submission.abstract = abstract - submission.pages = form.parsed_draft.get_pagecount() - submission.words = form.parsed_draft.get_wordcount() submission.authors = authors - submission.first_two_pages = ''.join(form.parsed_draft.pages[:2]) submission.file_size = file_size submission.file_types = ','.join(form.file_types) submission.xml_version = form.xml_version submission.submission_date = datetime.date.today() - submission.document_date = form.parsed_draft.get_creation_date() submission.replaces = "" - + # todo think through whether to do this + if form.parsed_draft is not None: + submission.pages = form.parsed_draft.get_pagecount() + submission.words = form.parsed_draft.get_wordcount() + submission.first_two_pages = ''.join(form.parsed_draft.pages[:2]) + submission.document_date = form.parsed_draft.get_creation_date() submission.save() - submission.formal_languages.set(FormalLanguageName.objects.filter(slug__in=form.parsed_draft.get_formal_languages())) + if form.parsed_draft is not None: + submission.formal_languages.set(FormalLanguageName.objects.filter(slug__in=form.parsed_draft.get_formal_languages())) set_extresources_from_existing_draft(submission) -def apply_checkers(submission, file_name): - # run submission checkers +def apply_checker(checker, submission, file_name): def apply_check(submission, checker, method, fn): func = getattr(checker, method) passed, message, errors, warnings, info = func(fn) @@ -860,18 +862,21 @@ def apply_check(submission, checker, method, fn): message=message, errors=errors, warnings=warnings, items=info, symbol=checker.symbol) check.save() + # ordered list of methods to try + for method in ("check_fragment_xml", "check_file_xml", "check_fragment_txt", "check_file_txt", ): + ext = method[-3:] + if hasattr(checker, method) and ext in file_name: + apply_check(submission, checker, method, file_name[ext]) + break +def apply_checkers(submission, file_name): + # run submission checkers mark = time.time() for checker_path in settings.IDSUBMIT_CHECKER_CLASSES: lap = time.time() checker_class = import_string(checker_path) checker = checker_class() - # ordered list of methods to try - for method in ("check_fragment_xml", "check_file_xml", "check_fragment_txt", "check_file_txt", ): - ext = method[-3:] - if hasattr(checker, method) and ext in file_name: - apply_check(submission, checker, method, file_name[ext]) - break + apply_checker(checker, submission, file_name) tau = time.time() - lap log.log(f"ran {checker.__class__.__name__} ({tau:.3}s) for {file_name}") tau = time.time() - mark @@ -892,7 +897,74 @@ def accept_submission_requires_group_approval(submission): and not Preapproval.objects.filter(name=submission.name).exists() ) -def accept_submission(request, submission, autopost=False): + +class SubmissionError(Exception): + pass + + +def staging_path(filename, revision, ext): + return pathlib.Path(settings.IDSUBMIT_STAGING_PATH) / f'{filename}-{revision}{ext}' + + +def render_missing_formats(submission): + """Generate txt and html formats from xml draft + + todo allow for forms that have already been uploaded + """ + # todo check timestamps?? + xml2rfc.log.write_out = io.StringIO() # open(os.devnull, "w") + xml2rfc.log.write_err = io.StringIO() # open(os.devnull, "w") + os.environ["XML_LIBRARY"] = settings.XML_LIBRARY + xml_path = staging_path(submission.name, submission.rev, '.xml') + parser = xml2rfc.XmlRfcParser(str(xml_path), quiet=True) + # --- Parse the xml --- + xmltree = parser.parse(remove_comments=False) + # If we have v2, run it through v2v3. Keep track of the submitted version, though. + xmlroot = xmltree.getroot() + xml_version = xmlroot.get('version', '2') + if xml_version == '2': + v2v3 = xml2rfc.V2v3XmlWriter(xmltree) + xmltree.tree = v2v3.convert2to3() + + # --- Prep the xml --- + prep = xml2rfc.PrepToolWriter(xmltree, quiet=True, liberal=True, keep_pis=[xml2rfc.V3_PI_TARGET]) + prep.options.accept_prepped = True + xmltree.tree = prep.prep() + if xmltree.tree == None: + raise SubmissionError(f'Error from xml2rfc (prep): {prep.errors}') + + # --- Convert to txt --- + txt_path = staging_path(submission.name, submission.rev, '.txt') + if not txt_path.exists(): + writer = xml2rfc.TextWriter(xmltree, quiet=True) + writer.options.accept_prepped = True + writer.write(txt_path) + log.log( + 'In %s: xml2rfc %s generated %s from %s (version %s)' % ( + str(xml_path.parent), + xml2rfc.__version__, + txt_path.name, + xml_path.name, + xml_version, + ) + ) + + # --- Convert to html --- + html_path = staging_path(submission.name, submission.rev, '.html') + writer = xml2rfc.HtmlWriter(xmltree, quiet=True) + writer.write(str(html_path)) + log.log( + 'In %s: xml2rfc %s generated %s from %s (version %s)' % ( + str(xml_path.parent), + xml2rfc.__version__, + html_path.name, + xml_path.name, + xml_version, + ) + ) + + +def accept_submission(submission: Submission, request: Optional[HttpRequest] = None, autopost=False): """Accept a submission and post or put in correct state to await approvals If autopost is True, will post draft if submitter is authorized to do so. @@ -903,17 +975,14 @@ def accept_submission(request, submission, autopost=False): for author in submission.authors ] # Is the user authenticated as an author who can approve this submission? user_is_author = ( - request.user.is_authenticated + request is not None + and request.user.is_authenticated and request.user.person in (prev_authors if submission.rev != '00' else curr_authors) # type: ignore ) # If "who" is None, docevent_from_submission will pull it out of submission - docevent_from_submission( - request, - submission, - desc="Uploaded new revision", - who=request.user.person if user_is_author else None, - ) + docevent_from_submission(submission, desc="Uploaded new revision", + who=request.user.person if user_is_author else None) replaces = DocAlias.objects.filter(name__in=submission.replaces_names) pretty_replaces = '(none)' if not replaces else ( @@ -1008,7 +1077,7 @@ def accept_submission(request, submission, autopost=False): if sub_event_desc: create_submission_event(request, submission, sub_event_desc) if docevent_desc: - docevent_from_submission(request, submission, docevent_desc, who=Person.objects.get(name="(System)")) + docevent_from_submission(submission, docevent_desc, who=Person.objects.get(name="(System)")) return address_list diff --git a/ietf/submit/views.py b/ietf/submit/views.py index 09b006c1d4..dbb66fd94f 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -107,6 +107,101 @@ def upload_submission(request): {'selected': 'index', 'form': form}) +@csrf_exempt +def api_upload(request): + def err(code, text): + return HttpResponse(text, status=code, content_type='text/plain') + + if request.method == 'GET': + return render(request, 'submit/api_submit_info.html') + elif request.method == 'POST': + exception = None + submission = None + try: + debug.mark() + form = SubmissionAutoUploadForm(request, data=request.POST, files=request.FILES) + if form.is_valid(): + log('got valid submission form for %s' % form.filename) + username = form.cleaned_data['user'] + user = User.objects.filter(username=username) + if user.count() == 0: + # See if a secondary login was being used + email = Email.objects.filter(address=username, active=True) + # The error messages don't talk about 'email', as the field we're + # looking at is still the 'username' field. + if email.count() == 0: + return err(400, "No such user: %s" % username) + elif email.count() > 1: + return err(500, "Multiple matching accounts for %s" % username) + email = email.first() + if not hasattr(email, 'person'): + return err(400, "No person matches %s" % username) + person = email.person + if not hasattr(person, 'user'): + return err(400, "No user matches: %s" % username) + user = person.user + elif user.count() > 1: + return err(500, "Multiple matching accounts for %s" % username) + else: + user = user.first() + if not hasattr(user, 'person'): + return err(400, "No person with username %s" % username) + + saved_files = save_files(form) + + # todo sort out author parsing - this only works for xml drafts + authors = form.authors + for a in authors: + if not a['email']: + raise ValidationError("Missing email address for author %s" % a) + + submission = get_submission(form) + fill_in_submission(form, submission, authors, '', None) + create_submission_event(request, submission, desc="Uploaded unchecked submission") + + # must do this after validate_submission() or data needed for check may be invalid + if check_submission_revision_consistency(submission): + return err( 409, "Submission failed due to a document revision inconsistency error " + "in the database. Please contact the secretariat for assistance.") + + author_emails = [a['email'].lower() for a in authors] + if not any( + email.address.lower() in author_emails + for email in user.person.email_set.filter(active=True) + ): + raise ValidationError('Submitter %s is not one of the document authors' % user.username) + + submission.submitter = user.person.formatted_email() + submission.save() + + from .tasks import check_and_accept_submission, render_missing_formats + ( + render_missing_formats.si(submission.pk) + | check_and_accept_submission(submission.pk) + ).delay() + + return HttpResponse( + f'Upload of {submission.name} OK, validation and acceptance pending', + content_type="text/plain") + else: + raise ValidationError(form.errors) + except IOError as e: + exception = e + return err(500, "IO Error: %s" % str(e)) + except ValidationError as e: + exception = e + return err(400, "Validation Error: %s" % str(e)) + except Exception as e: + exception = e + raise + return err(500, "Exception: %s" % str(e)) + finally: + if exception and submission: + remove_submission_files(submission) + submission.delete() + else: + return err(405, "Method not allowed") + @csrf_exempt def api_submit(request): "Automated submission entrypoint" diff --git a/ietf/utils/mail.py b/ietf/utils/mail.py index c18981d41f..4a3e7e3188 100644 --- a/ietf/utils/mail.py +++ b/ietf/utils/mail.py @@ -191,7 +191,10 @@ def encode_message(txt): return MIMEText(txt.encode('utf-8'), 'plain', 'UTF-8') def send_mail_text(request, to, frm, subject, txt, cc=None, extra=None, toUser=False, bcc=None, copy=True, save=True): - """Send plain text message.""" + """Send plain text message. + + request can be None unless it is needed by the template + """ msg = encode_message(txt) return send_mail_mime(request, to, frm, subject, msg, cc, extra, toUser, bcc, copy=copy, save=save) diff --git a/ietf/utils/text.py b/ietf/utils/text.py index 5239419e8f..a47a4cae66 100644 --- a/ietf/utils/text.py +++ b/ietf/utils/text.py @@ -203,6 +203,7 @@ def unwrap(s): return s.replace('\n', ' ') def normalize_text(s): + """Normalize various unicode whitespaces to ordinary spaces""" return re.sub(r'[\s\n\r\u2028\u2029]+', ' ', s, flags=re.U).strip() def parse_unicode(text): From 94be2648a053358d2cb2838e0f71fe6a4b09d58f Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 9 May 2022 13:23:08 -0300 Subject: [PATCH 11/99] perf: index Submission table on submission_date This substantially speeds up submission rate threshold checks. --- ietf/submit/models.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ietf/submit/models.py b/ietf/submit/models.py index 6043d4ab7c..9f0e2fa337 100644 --- a/ietf/submit/models.py +++ b/ietf/submit/models.py @@ -63,6 +63,11 @@ class Submission(models.Model): def __str__(self): return "%s-%s" % (self.name, self.rev) + class Meta: + indexes = [ + models.Index(fields=['submission_date']), + ] + def submitter_parsed(self): return parse_email_line(self.submitter) From 1e065a45efed29aa70fbbdb290a0e02810fd063f Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Mon, 9 May 2022 13:27:22 -0300 Subject: [PATCH 12/99] feat: remove existing files when accepting a new submission After checking that a submission is not in progress, remove any files in staging that have the same name/rev with any extension. This should guard against stale files confusing the submission process if the usual cleanup fails or is skipped for some reason. --- ietf/submit/utils.py | 25 +++++++++++++++++++++---- ietf/submit/views.py | 7 +++++-- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 1476750ae6..5f696d8d52 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -665,11 +665,22 @@ def move_files_to_repository(submission): elif ext in submission.file_types.split(','): raise ValueError("Intended to move '%s' to '%s', but found source and destination missing.") + +def remove_staging_files(name, rev, exts=None): + """Remove staging files corresponding to a submission + + exts is a list of extensions to be removed. If None, defaults to settings.IDSUBMIT_FILE_TYPES. + """ + if exts is None: + exts = [f'.{ext}' for ext in settings.IDSUBMIT_FILE_TYPES] + basename = pathlib.Path(settings.IDSUBMIT_STAGING_PATH) / f'{name}-{rev}' + for ext in exts: + basename.with_suffix(ext).unlink(missing_ok=True) + + def remove_submission_files(submission): - for ext in submission.file_types.split(','): - source = os.path.join(settings.IDSUBMIT_STAGING_PATH, '%s-%s%s' % (submission.name, submission.rev, ext)) - if os.path.exists(source): - os.unlink(source) + remove_staging_files(submission.name, submission.rev, submission.file_types.split(',')) + def approvable_submissions_for_user(user): if not user.is_authenticated: @@ -732,6 +743,12 @@ def expire_submission(submission, by): SubmissionEvent.objects.create(submission=submission, by=by, desc="Cancelled expired submission") + +def clear_existing_files(form): + """Make sure there are no leftover files from a previous submission""" + remove_staging_files(form.filename, form.revision) + + def save_files(form): file_name = {} for ext in list(form.fields.keys()): diff --git a/ietf/submit/views.py b/ietf/submit/views.py index dbb66fd94f..9db5f2766d 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -38,7 +38,7 @@ from ietf.submit.utils import ( approvable_submissions_for_user, preapprovals_for_user, recently_approved_by_user, validate_submission, create_submission_event, docevent_from_submission, post_submission, cancel_submission, rename_submission_files, remove_submission_files, get_draft_meta, - get_submission, fill_in_submission, apply_checkers, save_files, + get_submission, fill_in_submission, apply_checkers, save_files, clear_existing_files, check_submission_revision_consistency, accept_submission, accept_submission_requires_group_approval, accept_submission_requires_prev_auth_approval, update_submission_external_resources, remote_ip ) from ietf.stats.utils import clean_country_name @@ -53,6 +53,7 @@ def upload_submission(request): form = SubmissionManualUploadForm(request, data=request.POST, files=request.FILES) if form.is_valid(): log('got valid submission form for %s' % form.filename) + clear_existing_files(form) saved_files = save_files(form) authors, abstract, file_name, file_size = get_draft_meta(form, saved_files) @@ -147,7 +148,8 @@ def err(code, text): if not hasattr(user, 'person'): return err(400, "No person with username %s" % username) - saved_files = save_files(form) + clear_existing_files(form) + save_files(form) # todo sort out author parsing - this only works for xml drafts authors = form.authors @@ -242,6 +244,7 @@ def err(code, text): if not hasattr(user, 'person'): return err(400, "No person with username %s" % username) + clear_existing_files(form) saved_files = save_files(form) authors, abstract, file_name, file_size = get_draft_meta(form, saved_files) for a in authors: From 9dd7b5f4dfbceffbda79e0fef1308b44ed80e3b5 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 10 May 2022 12:08:12 -0300 Subject: [PATCH 13/99] refactor: make clear that deduce_group() uses only the draft name --- ietf/submit/forms.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index 120ed0933a..17fa45d15d 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -669,7 +669,7 @@ def cleanup(): # called when context exited, even in case of exception if self.cleaned_data.get('txt') or self.cleaned_data.get('xml'): # check group - self.group = self.deduce_group() + self.group = self.deduce_group(self.filename) # check existing existing = Submission.objects.filter(name=self.filename, rev=self.revision).exclude(state__in=("posted", "cancel", "waiting-for-draft")) if existing: @@ -712,9 +712,9 @@ def check_submissions_thresholds(self, which, filter_kwargs, max_amount, max_siz if sum(s.file_size for s in submissions if s.file_size) > max_size * 1024 * 1024: raise forms.ValidationError("Max uploaded amount %s has been reached for today (maximum is %s MB)." % (which, max_size)) - def deduce_group(self): + @staticmethod + def deduce_group(name): """Figure out group from name or previously submitted draft, returns None if individual.""" - name = self.filename existing_draft = Document.objects.filter(name=name, type="draft") if existing_draft: group = existing_draft[0].group From bcfc85e9e7accd7378ee8536595184001c077304 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 10 May 2022 12:12:39 -0300 Subject: [PATCH 14/99] refactor: extract only draft name/revision in clean() method Minimizing the amount of validation done when accepting a file. The data extraction will be moved to asynchronous processing. --- ietf/submit/forms.py | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index 17fa45d15d..fd9d8d121a 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -566,6 +566,7 @@ def format_messages(where, e, log_msgs): if not self.errors: raise forms.ValidationError('Unexpected submission file types; found %s, but %s is required' % (', '.join(self.file_types), ' or '.join(self.base_formats))) + # Determine the draft name and revision. Try XML first. if self.cleaned_data.get('xml'): xml_file = self.cleaned_data.get('xml') tfn = None @@ -610,32 +611,17 @@ def cleanup(): # called when context exited, even in case of exception else: self.revision = None self.filename = draftname - self.authors = xml_draft.get_author_list() - self.title = xml_draft.get_title() - - # The following errors are likely noise if we have previous field - # errors: - if self.errors: - raise forms.ValidationError('') - - if self.cleaned_data.get('txt'): + elif self.cleaned_data.get('txt'): + # no XML available, extract from the text if we have it # try to parse it txt_file = self.cleaned_data['txt'] txt_file.seek(0) bytes = txt_file.read() - txt_file.seek(0) try: text = bytes.decode(self.file_info['txt'].charset) - # self.parsed_draft = PlaintextDraft(text, txt_file.name) - if self.filename == None: - self.filename = self.parsed_draft.filename - elif self.filename != self.parsed_draft.filename: - self.add_error('txt', "Inconsistent name information: xml:%s, txt:%s" % (self.filename, self.parsed_draft.filename)) - if self.revision == None: - self.revision = self.parsed_draft.revision - elif self.revision != self.parsed_draft.revision: - self.add_error('txt', "Inconsistent revision information: xml:%s, txt:%s" % (self.revision, self.parsed_draft.revision)) + self.filename = self.parsed_draft.filename + self.revision = self.parsed_draft.revision except (UnicodeDecodeError, LookupError) as e: self.add_error('txt', 'Failed decoding the uploaded file: "%s"' % str(e)) @@ -664,9 +650,6 @@ def cleanup(): # called when context exited, even in case of exception "element has a docName attribute which provides the full draft name including " "revision number.") - if not self.title: - raise forms.ValidationError("Could not extract a valid title from the upload") - if self.cleaned_data.get('txt') or self.cleaned_data.get('xml'): # check group self.group = self.deduce_group(self.filename) From 52c0864604148d68d9e9dc7e50fbfb3bc0779fdf Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 10 May 2022 12:28:19 -0300 Subject: [PATCH 15/99] refactor: minimize checks and data extraction in api_upload() view --- ietf/submit/views.py | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/ietf/submit/views.py b/ietf/submit/views.py index 9db5f2766d..6f062fe50c 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -151,30 +151,17 @@ def err(code, text): clear_existing_files(form) save_files(form) - # todo sort out author parsing - this only works for xml drafts - authors = form.authors - for a in authors: - if not a['email']: - raise ValidationError("Missing email address for author %s" % a) - submission = get_submission(form) - fill_in_submission(form, submission, authors, '', None) - create_submission_event(request, submission, desc="Uploaded unchecked submission") - - # must do this after validate_submission() or data needed for check may be invalid + submission.state = DraftSubmissionStateName.objects.get(slug="uploaded") + submission.remote_ip = form.remote_ip + submission.file_types = ','.join(form.file_types) + submission.submission_date = datetime.date.today() if check_submission_revision_consistency(submission): return err( 409, "Submission failed due to a document revision inconsistency error " "in the database. Please contact the secretariat for assistance.") - - author_emails = [a['email'].lower() for a in authors] - if not any( - email.address.lower() in author_emails - for email in user.person.email_set.filter(active=True) - ): - raise ValidationError('Submitter %s is not one of the document authors' % user.username) - submission.submitter = user.person.formatted_email() submission.save() + create_submission_event(request, submission, desc="Uploaded submission through API") from .tasks import check_and_accept_submission, render_missing_formats ( @@ -196,7 +183,6 @@ def err(code, text): except Exception as e: exception = e raise - return err(500, "Exception: %s" % str(e)) finally: if exception and submission: remove_submission_files(submission) From 553720c3d853454b95a6248821eb513afbd0f96a Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 12 May 2022 17:31:37 -0300 Subject: [PATCH 16/99] ci: fix dockerfiles to match sandbox testing --- docker-compose.yml | 1 + docker/docker-compose.celery.yml | 44 +++++++++++++++++++------------- ietf/settings.py | 2 +- 3 files changed, 28 insertions(+), 19 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index e0d8a6f182..cc4437a36b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -75,6 +75,7 @@ services: UPDATE_REQUIREMENTS: 1 command: - '--loglevel=INFO' + network_mode: 'service:mq' volumes: mariadb-data: diff --git a/docker/docker-compose.celery.yml b/docker/docker-compose.celery.yml index 97d7af67a5..dac674079e 100644 --- a/docker/docker-compose.celery.yml +++ b/docker/docker-compose.celery.yml @@ -1,26 +1,31 @@ -version: '3.8' +version: '2.4' +# Use version 2.4 for mem_limit setting. Version 3+ uses deploy.resources.limits.memory +# instead, but that only works for swarm with docker-compose 1.25.1. services: mq: image: rabbitmq:3-alpine - user: '${RABBITMQ_UID:?Must specify RABBITMQ_UID}' + user: '${RABBITMQ_UID:-499:499}' hostname: datatracker-mq - deploy: - resources: - limits: - memory: 1gb # coordinate with settings in rabbitmq.conf - reservations: - memory: 512mb +# deploy: +# resources: +# limits: +# memory: 1gb # coordinate with settings in rabbitmq.conf +# reservations: +# memory: 512mb + mem_limit: 1gb # coordinate with settings in rabbitmq.conf ports: - '${MQ_PORT:-5672}:5672' volumes: - - /var/lib/rabbitmq + - ./lib.rabbitmq:/var/lib/rabbitmq - ./rabbitmq.conf:/etc/rabbitmq/conf.d/90-ietf.conf - ./definitions.json:/ietf-conf/definitions.json restart: unless-stopped -# logging: -# driver: "syslog" -# options: + logging: + driver: "syslog" + options: + syslog-address: 'unixgram:///dev/log' + tag: 'docker/{{.Name}}' # syslog-address: "tcp://ietfa.amsl.com:514" celery: @@ -30,14 +35,17 @@ services: # UPDATE_REQUIREMENTS: 1 # uncomment to update Python requirements on startup command: - '--loglevel=INFO' - user: '${CELERY_UID:?Must specify CELERY_UID}' + user: '${CELERY_UID:-499:499}' volumes: - - ..:/workspace - - '${MYSQL_SOCKET_PATH:-/run/mysqld}:/run/mysqld' + - '${DATATRACKER_PATH:-..}:/workspace' + - '${MYSQL_SOCKET_PATH:-/run/mysql}:/run/mysql' depends_on: - mq + network_mode: 'service:mq' restart: unless-stopped -# logging: -# driver: "syslog" -# options: + logging: + driver: "syslog" + options: + syslog-address: 'unixgram:///dev/log' + tag: 'docker/{{.Name}}' # syslog-address: "tcp://ietfa.amsl.com:514" diff --git a/ietf/settings.py b/ietf/settings.py index a27ec23068..1fd9988394 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -1171,7 +1171,7 @@ def skip_unreadable_post(record): # Celery configuration CELERY_TIMEZONE = 'UTC' -CELERY_BROKER_URL = 'amqp://mq/' +CELERY_BROKER_URL = 'amqp:///' CELERY_ACKS_LATE = True # failed tasks will be retried; keep tasks idempotent or disable per-task From caae932671e00520737574c053ed82ac912b3341 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 17 May 2022 14:07:46 -0300 Subject: [PATCH 17/99] ci: tweak celery container docker-compose settings --- docker-compose.yml | 2 +- ietf/settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index cc4437a36b..1adc987a9b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -75,7 +75,7 @@ services: UPDATE_REQUIREMENTS: 1 command: - '--loglevel=INFO' - network_mode: 'service:mq' + restart: unless-stopped volumes: mariadb-data: diff --git a/ietf/settings.py b/ietf/settings.py index 1fd9988394..a27ec23068 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -1171,7 +1171,7 @@ def skip_unreadable_post(record): # Celery configuration CELERY_TIMEZONE = 'UTC' -CELERY_BROKER_URL = 'amqp:///' +CELERY_BROKER_URL = 'amqp://mq/' CELERY_ACKS_LATE = True # failed tasks will be retried; keep tasks idempotent or disable per-task From 517cf21ea4392903088a3e8f43e87121b7cd5d05 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 17 May 2022 14:13:02 -0300 Subject: [PATCH 18/99] refactor: clean up Draft parsing API and usage * remove get_draftname() from Draft api; set filename during init * further XMLDraft work - remember xml_version after parsing - extract filename/revision during init - comment out long broken get_abstract() method * adjust form clean() method to use changed API --- ietf/submit/forms.py | 16 ++-------------- ietf/utils/draft.py | 7 ------- ietf/utils/xmldraft.py | 41 ++++++++++++++++++++++++++++++++++------- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index fd9d8d121a..3fe07e2547 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -597,20 +597,8 @@ def cleanup(): # called when context exited, even in case of exception self.add_error('xml', msgs) # todo other error handling??? - draftname = xml_draft.get_draftname() - if draftname is None: - self.add_error('xml', "No docName attribute found in the xml root element") - name_error = validate_submission_name(draftname) - if name_error: - self.add_error('xml', name_error) # This is a critical and immediate failure - do not proceed with other validation. - else: - revmatch = re.search("-[0-9][0-9]$", draftname) - if revmatch: - self.revision = draftname[-2:] - self.filename = draftname[:-3] - else: - self.revision = None - self.filename = draftname + self.filename = xml_draft.filename + self.revision = xml_draft.revision elif self.cleaned_data.get('txt'): # no XML available, extract from the text if we have it # try to parse it diff --git a/ietf/utils/draft.py b/ietf/utils/draft.py index 0b53313c36..86a173d6c7 100755 --- a/ietf/utils/draft.py +++ b/ietf/utils/draft.py @@ -170,9 +170,6 @@ def get_authors_with_firm(self): def get_creation_date(self): raise NotImplementedError - def get_draftname(self): - raise NotImplementedError - def get_formal_languages(self): raise NotImplementedError @@ -376,10 +373,6 @@ def begpage(pages, page, newpage, line=None): _debug('pages: %s' % len(pages)) return stripped, pages - # ---------------------------------------------------------------------- - def get_draftname(self): - return self.filename - # ---------------------------------------------------------------------- def get_pagecount(self): if self._pagecount == None: diff --git a/ietf/utils/xmldraft.py b/ietf/utils/xmldraft.py index cc9d90f59d..b71c591865 100644 --- a/ietf/utils/xmldraft.py +++ b/ietf/utils/xmldraft.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import io import os +import re import xml2rfc import debug # pyflakes: ignore @@ -26,11 +27,17 @@ def __init__(self, xml_file): """ super().__init__() # cast xml_file to str so, e.g., this will work with a Path - self.xmltree = self.parse_xml(str(xml_file)) + self.xmltree, self.xml_version = self.parse_xml(str(xml_file)) self.xmlroot = self.xmltree.getroot() + self.filename, self.revision = self._parse_docname() @staticmethod def parse_xml(filename): + """Parse XML draft + + Converts to xml2rfc v3 schema, then returns the root of the v3 tree and the original + xml version. + """ orig_write_out = xml2rfc.log.write_out orig_write_err = xml2rfc.log.write_err orig_xml_library = os.environ.get('XML_LIBRARY', None) @@ -60,7 +67,7 @@ def cleanup(): # called when context exited, even if there's an exception if xml_version == '2': v2v3 = xml2rfc.V2v3XmlWriter(tree) tree.tree = v2v3.convert2to3() - return tree + return tree, xml_version def _document_name(self, anchor): """Guess document name from reference anchor @@ -90,15 +97,35 @@ def _reference_section_name(self, section_elt): section_name = section_elt.get('title') # fall back to title if we have it return section_name - def get_draftname(self): - return self.xmlroot.attrib.get('docName') + def _parse_docname(self): + docname = self.xmlroot.attrib.get('docName') + revmatch = re.match( + r'^(?P.+?)(?:-(?P[0-9][0-9]))?$', + docname, + + ) + if revmatch is None: + raise ValueError('Unable to parse docName') + # If a group had no match it is None + return revmatch.group('filename'), revmatch.group('rev') def get_title(self): return self.xmlroot.findtext('front/title').strip() - def get_abstract(self): - abstract = self.xmlroot.findtext('front/abstract') - return abstract.strip() if abstract else '' + # todo fix the implementation of XMLDraft.get_abstract() + # + # This code was pulled from ietf.submit.forms where it existed for some time. + # It does not work, at least with modern xml2rfc. This assumes that the abstract + # is simply text in the front/abstract node, but the XML schema wraps the actual + # abstract text in elements (and allows
,
    , and
      as well). As a + # result, this method normally returns an empty string, which is later replaced by + # the abstract parsed from the rendered text. For now, I a commenting this out + # and making it explicit that the abstract always comes from the text format. + # + # def get_abstract(self): + # """Extract the abstract""" + # abstract = self.xmlroot.findtext('front/abstract') + # return abstract.strip() if abstract else '' def get_author_list(self): """Get detailed author list From f9cd8cfe7d9c98bbd9f5672ef2cb9c2fa091243d Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 17 May 2022 14:13:32 -0300 Subject: [PATCH 19/99] feat: flesh out async submission processing First basically working pass! --- ietf/submit/tasks.py | 4 +-- ietf/submit/utils.py | 84 ++++++++++++++++++++++++++++++++++++++++++-- ietf/submit/views.py | 5 ++- 3 files changed, 85 insertions(+), 8 deletions(-) diff --git a/ietf/submit/tasks.py b/ietf/submit/tasks.py index 00e25c1094..44b056d48e 100644 --- a/ietf/submit/tasks.py +++ b/ietf/submit/tasks.py @@ -55,9 +55,9 @@ def check_and_accept_submission(submission_id): @shared_task -def render_missing_formats(submission_id): +def process_uploaded_submission(submission_id): submission = Submission.objects.get(pk=submission_id) - utils.render_missing_formats(submission) + utils.process_uploaded_submission(submission) @shared_task(bind=True) diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 5f696d8d52..57c18e017a 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -1,5 +1,5 @@ -# Copyright The IETF Trust 2011-2020, All Rights Reserved # -*- coding: utf-8 -*- +# Copyright The IETF Trust 2011-2020, All Rights Reserved import datetime @@ -11,6 +11,7 @@ import xml2rfc from typing import Optional # pyflakes:ignore +from unidecode import unidecode from django.conf import settings from django.core.exceptions import ValidationError @@ -44,7 +45,8 @@ from ietf.utils.accesstoken import generate_random_key from ietf.utils.draft import PlaintextDraft from ietf.utils.mail import is_valid_email -from ietf.utils.text import parse_unicode +from ietf.utils.text import parse_unicode, normalize_text +from ietf.utils.xmldraft import XMLDraft from ietf.person.name import unidecode_name @@ -859,7 +861,6 @@ def fill_in_submission(form, submission, authors, abstract, file_size): submission.xml_version = form.xml_version submission.submission_date = datetime.date.today() submission.replaces = "" - # todo think through whether to do this if form.parsed_draft is not None: submission.pages = form.parsed_draft.get_pagecount() submission.words = form.parsed_draft.get_wordcount() @@ -1125,3 +1126,80 @@ def remote_ip(request): else: remote_ip = request.META.get('REMOTE_ADDR', None) return remote_ip + + +def _normalize_title(title): + if isinstance(title, str): + title = unidecode(title) # replace unicode with best-match ascii + return normalize_text(title) # normalize whitespace + + +def process_submission_xml(submission): + """Validate and extract info from an uploaded submission""" + xml_path = staging_path(submission.name, submission.rev, '.xml') + xml_draft = XMLDraft(xml_path) + + if submission.name != xml_draft.filename: + raise ValueError('XML draft filename disagrees with submission filename') + if submission.rev != xml_draft.revision: + raise ValueError('XML draft revision disagrees with submission revision') + + authors = xml_draft.get_author_list() + for a in authors: + if not a['email']: + raise ValueError(f'Missing email address for author {a}') + + author_emails = [a['email'].lower() for a in authors] + submitter = get_person_from_name_email(**submission.submitter_parsed()) # the ** expands dict into kwargs + if not any( + email.address.lower() in author_emails + for email in submitter.email_set.filter(active=True) + ): + raise ValueError(f'Submitter ({submitter}) is not one of the document authors') + + # Fill in the submission data + submission.title = _normalize_title(xml_draft.get_title()) + if not submission.title: + raise ValueError('Could not extract a valid title from the XML') + submission.authors = [ + {key: auth[key] for key in ('name', 'email', 'affiliation', 'country')} + for auth in authors + ] + submission.xml_version = xml_draft.xml_version + submission.save() + + +def process_submission_text(submission): + # todo adapt for possibility that txt was uploaded (this assumes it came from xml) + text_path = staging_path(submission.name, submission.rev, '.txt') + text_draft = PlaintextDraft.from_file(text_path) + + if submission.name != text_draft.filename: + raise ValueError('Text draft filename disagrees with submission filename') + if submission.rev != text_draft.revision: + raise ValueError('Text draft revision disagrees with submission revision') + if not _normalize_title(text_draft.get_title()): + raise ValueError('Could not extract a valid title from the text') + + # extract authors? + + submission.abstract = text_draft.get_abstract() # todo allow for possibility of abstract from XML? + submission.document_date = text_draft.get_creation_date() + submission.pages = text_draft.get_pagecount() + submission.words = text_draft.get_wordcount() + submission.first_two_pages = ''.join(text_draft.pages[:2]) + submission.file_size = os.stat(text_path).st_size + submission.save() + + submission.formal_languages.set( + FormalLanguageName.objects.filter( + slug__in=text_draft.get_formal_languages() + ) + ) + + +def process_uploaded_submission(submission): + process_submission_xml(submission) + render_missing_formats(submission) + process_submission_text(submission) + set_extresources_from_existing_draft(submission) diff --git a/ietf/submit/views.py b/ietf/submit/views.py index 6f062fe50c..fa5e10c9c1 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -34,7 +34,7 @@ from ietf.submit.mail import send_full_url, send_manual_post_request, add_submission_email, get_reply_to from ietf.submit.models import (Submission, Preapproval, SubmissionExtResource, DraftSubmissionStateName, SubmissionEmailEvent ) -from ietf.submit.tasks import poke +from ietf.submit.tasks import check_and_accept_submission, process_uploaded_submission, poke from ietf.submit.utils import ( approvable_submissions_for_user, preapprovals_for_user, recently_approved_by_user, validate_submission, create_submission_event, docevent_from_submission, post_submission, cancel_submission, rename_submission_files, remove_submission_files, get_draft_meta, @@ -163,9 +163,8 @@ def err(code, text): submission.save() create_submission_event(request, submission, desc="Uploaded submission through API") - from .tasks import check_and_accept_submission, render_missing_formats ( - render_missing_formats.si(submission.pk) + process_uploaded_submission.si(submission.pk) | check_and_accept_submission(submission.pk) ).delay() From 071e30fb05044e493f2ff3f035f026e2eb528578 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Tue, 17 May 2022 17:04:37 -0300 Subject: [PATCH 20/99] feat: add state name for submission being validated asynchronously --- ietf/name/fixtures/names.json | 15 +++++++ ...add_draftsubmissionstatename_validating.py | 40 +++++++++++++++++++ ietf/submit/tasks.py | 4 ++ ietf/submit/views.py | 2 +- 4 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 ietf/name/migrations/0041_add_draftsubmissionstatename_validating.py diff --git a/ietf/name/fixtures/names.json b/ietf/name/fixtures/names.json index c749f0a279..b0e0f62003 100644 --- a/ietf/name/fixtures/names.json +++ b/ietf/name/fixtures/names.json @@ -10405,6 +10405,21 @@ "model": "name.draftsubmissionstatename", "pk": "uploaded" }, + { + "fields": { + "desc": "Running validation checks on received submission", + "name": "Validating Submitted Draft", + "next_states": [ + "uploaded", + "manual", + "cancel" + ], + "order": 10, + "used": true + }, + "model": "name.draftsubmissionstatename", + "pk": "validating" + }, { "fields": { "desc": "", diff --git a/ietf/name/migrations/0041_add_draftsubmissionstatename_validating.py b/ietf/name/migrations/0041_add_draftsubmissionstatename_validating.py new file mode 100644 index 0000000000..6660655afa --- /dev/null +++ b/ietf/name/migrations/0041_add_draftsubmissionstatename_validating.py @@ -0,0 +1,40 @@ +# Generated by Django 2.2.28 on 2022-05-17 11:35 + +from django.db import migrations + + +def forward(apps, schema_editor): + DraftSubmissionStateName = apps.get_model('name', 'DraftSubmissionStateName') + new_state = DraftSubmissionStateName.objects.create( + slug='validating', + name='Validating Submitted Draft', + desc='Running validation checks on received submission', + used=True, + order=1 + DraftSubmissionStateName.objects.order_by('-order').first().order, + ) + new_state.next_states.set( + DraftSubmissionStateName.objects.filter( + slug__in=['cancel', 'manual', 'uploaded'], + ) + ) + + +def reverse(apps, schema_editor): + Submission = apps.get_model('submit', 'Submission') + # Any submissions in the state we are about to delete would be deleted. + # Remove these manually if you really mean to do this. + assert Submission.objects.filter(state__slug='validating').count() == 0 + DraftSubmissionStateName = apps.get_model('name', 'DraftSubmissionStateName') + DraftSubmissionStateName.objects.filter(slug='validating').delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ('name', '0040_remove_constraintname_editor_label'), + ('submit', '0001_initial'), # ensure Submission model exists + ] + + operations = [ + migrations.RunPython(forward, reverse) + ] diff --git a/ietf/submit/tasks.py b/ietf/submit/tasks.py index 44b056d48e..c01ab370ce 100644 --- a/ietf/submit/tasks.py +++ b/ietf/submit/tasks.py @@ -8,6 +8,7 @@ from django.conf import settings from django.utils.module_loading import import_string +from ietf.name.models import DraftSubmissionStateName from ietf.submit.models import Submission from ietf.submit import utils from ietf.utils import log @@ -36,6 +37,9 @@ def apply_checker(checker_path, submission_id): @shared_task def accept_submission(submission_id): submission = Submission.objects.get(pk=submission_id) + submission.state = DraftSubmissionStateName.objects.get(slug='uploaded') + utils.create_submission_event(None, submission, desc="Completed submission validation checks") + errors = [c.message for c in submission.checks.filter(passed__isnull=False) if not c.passed] if errors: # utils.remove_submission_files(submission) diff --git a/ietf/submit/views.py b/ietf/submit/views.py index fa5e10c9c1..f3fd7cb160 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -152,7 +152,7 @@ def err(code, text): save_files(form) submission = get_submission(form) - submission.state = DraftSubmissionStateName.objects.get(slug="uploaded") + submission.state = DraftSubmissionStateName.objects.get(slug="validating") submission.remote_ip = form.remote_ip submission.file_types = ','.join(form.file_types) submission.submission_date = datetime.date.today() From 9d77822a20dba4c3f049cd260e0ac6cc1f6ae96a Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Wed, 18 May 2022 11:06:40 -0300 Subject: [PATCH 21/99] feat: cancel submissions that async processing can't handle --- ietf/submit/utils.py | 61 ++++++++++++++++++++++++++++++++------------ ietf/submit/views.py | 3 --- 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 57c18e017a..c477253376 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -202,7 +202,7 @@ def check_submission_revision_consistency(submission): return None -def create_submission_event(request, submission, desc): +def create_submission_event(request: Optional[HttpRequest], submission, desc): by = None if request and request.user.is_authenticated: try: @@ -1134,20 +1134,25 @@ def _normalize_title(title): return normalize_text(title) # normalize whitespace +class SubmissionValidationError(Exception): + """Error class for submission validation checks""" + pass + + def process_submission_xml(submission): """Validate and extract info from an uploaded submission""" xml_path = staging_path(submission.name, submission.rev, '.xml') xml_draft = XMLDraft(xml_path) if submission.name != xml_draft.filename: - raise ValueError('XML draft filename disagrees with submission filename') + raise SubmissionValidationError('XML draft filename disagrees with submission filename') if submission.rev != xml_draft.revision: - raise ValueError('XML draft revision disagrees with submission revision') + raise SubmissionValidationError('XML draft revision disagrees with submission revision') authors = xml_draft.get_author_list() for a in authors: if not a['email']: - raise ValueError(f'Missing email address for author {a}') + raise SubmissionValidationError(f'Missing email address for author {a}') author_emails = [a['email'].lower() for a in authors] submitter = get_person_from_name_email(**submission.submitter_parsed()) # the ** expands dict into kwargs @@ -1155,12 +1160,12 @@ def process_submission_xml(submission): email.address.lower() in author_emails for email in submitter.email_set.filter(active=True) ): - raise ValueError(f'Submitter ({submitter}) is not one of the document authors') + raise SubmissionValidationError(f'Submitter ({submitter}) is not one of the document authors') # Fill in the submission data submission.title = _normalize_title(xml_draft.get_title()) if not submission.title: - raise ValueError('Could not extract a valid title from the XML') + raise SubmissionValidationError('Could not extract a valid title from the XML') submission.authors = [ {key: auth[key] for key in ('name', 'email', 'affiliation', 'country')} for auth in authors @@ -1170,18 +1175,22 @@ def process_submission_xml(submission): def process_submission_text(submission): - # todo adapt for possibility that txt was uploaded (this assumes it came from xml) + """Validate/extract data from the text version of a submitted draft + + This assumes the draft was uploaded as XML and extracts data that is not + currently available directly from the XML. Additional processing, e.g. from + get_draft_meta(), would need to be added in order to support direct text + draft uploads. + """ text_path = staging_path(submission.name, submission.rev, '.txt') text_draft = PlaintextDraft.from_file(text_path) if submission.name != text_draft.filename: - raise ValueError('Text draft filename disagrees with submission filename') + raise SubmissionValidationError('Text draft filename disagrees with submission filename') if submission.rev != text_draft.revision: - raise ValueError('Text draft revision disagrees with submission revision') + raise SubmissionValidationError('Text draft revision disagrees with submission revision') if not _normalize_title(text_draft.get_title()): - raise ValueError('Could not extract a valid title from the text') - - # extract authors? + raise SubmissionValidationError('Could not extract a valid title from the text') submission.abstract = text_draft.get_abstract() # todo allow for possibility of abstract from XML? submission.document_date = text_draft.get_creation_date() @@ -1199,7 +1208,27 @@ def process_submission_text(submission): def process_uploaded_submission(submission): - process_submission_xml(submission) - render_missing_formats(submission) - process_submission_text(submission) - set_extresources_from_existing_draft(submission) + def abort_submission(error_message): + remove_submission_files(submission) + submission.state_id = 'cancel' + submission.save() + create_submission_event(None, submission, f'Submission rejected: {error_message}') + + if submission.file_types != '.xml': + abort_submission('Only XML draft submissions can be processed.') + + try: + process_submission_xml(submission) + if check_submission_revision_consistency(submission): + raise SubmissionValidationError( + 'Document revision inconsistency error in the database. ' + 'Please contact the secretariat for assistance.' + ) + render_missing_formats(submission) + process_submission_text(submission) + set_extresources_from_existing_draft(submission) + except SubmissionValidationError as err: + abort_submission(str(err)) + except Exception as err: + abort_submission('An error occurred during validation. Please contact the secretariat for assistance.') + log.log(f'Exception while validating submission {submission.pk}: {err}') \ No newline at end of file diff --git a/ietf/submit/views.py b/ietf/submit/views.py index f3fd7cb160..e15d4d92e8 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -156,9 +156,6 @@ def err(code, text): submission.remote_ip = form.remote_ip submission.file_types = ','.join(form.file_types) submission.submission_date = datetime.date.today() - if check_submission_revision_consistency(submission): - return err( 409, "Submission failed due to a document revision inconsistency error " - "in the database. Please contact the secretariat for assistance.") submission.submitter = user.person.formatted_email() submission.save() create_submission_event(request, submission, desc="Uploaded submission through API") From 4a0fbcbed69b28e69fad5dd7e24289b207d981fe Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Wed, 18 May 2022 12:47:48 -0300 Subject: [PATCH 22/99] refactor: simplify/consolidate async tasks and improve error handling --- ietf/submit/tasks.py | 51 +------------------------------------------ ietf/submit/utils.py | 52 ++++++++++++++++++++++++++++---------------- ietf/submit/views.py | 13 +++++------ 3 files changed, 40 insertions(+), 76 deletions(-) diff --git a/ietf/submit/tasks.py b/ietf/submit/tasks.py index c01ab370ce..1f0fd1ee34 100644 --- a/ietf/submit/tasks.py +++ b/ietf/submit/tasks.py @@ -2,62 +2,13 @@ # # Celery task definitions # -from celery import chain, shared_task -from pathlib import Path +from celery import shared_task -from django.conf import settings -from django.utils.module_loading import import_string - -from ietf.name.models import DraftSubmissionStateName from ietf.submit.models import Submission from ietf.submit import utils from ietf.utils import log -@shared_task -def apply_checker(checker_path, submission_id): - try: - checker_class = import_string(checker_path) - except ImportError: - # todo fail - raise - submission = Submission.objects.get(pk=submission_id) - - basename = Path(settings.IDSUBMIT_STAGING_PATH) / f'{submission.name}-{submission.rev}' - utils.apply_checker( - checker_class(), - submission, - { - ext: basename.with_suffix(f'.{ext}') - for ext in ['xml', 'txt', 'html'] - } - ) - - -@shared_task -def accept_submission(submission_id): - submission = Submission.objects.get(pk=submission_id) - submission.state = DraftSubmissionStateName.objects.get(slug='uploaded') - utils.create_submission_event(None, submission, desc="Completed submission validation checks") - - errors = [c.message for c in submission.checks.filter(passed__isnull=False) if not c.passed] - if errors: - # utils.remove_submission_files(submission) - Submission.objects.filter(pk=submission_id).update(state_id='cancel') - return 'egad' - else: - utils.accept_submission(submission) - return 'yippie' - - -def check_and_accept_submission(submission_id): - checks = [ - apply_checker.si(checker_path, submission_id) - for checker_path in settings.IDSUBMIT_CHECKER_CLASSES - ] - return chain(*checks, accept_submission.si(submission_id)) - - @shared_task def process_uploaded_submission(submission_id): submission = Submission.objects.get(pk=submission_id) diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index c477253376..3447405e8a 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -917,19 +917,22 @@ def accept_submission_requires_group_approval(submission): class SubmissionError(Exception): + """Exception for errors during submission processing""" pass def staging_path(filename, revision, ext): + if len(ext) > 0 and ext[0] != '.': + ext = f'.{ext}' return pathlib.Path(settings.IDSUBMIT_STAGING_PATH) / f'{filename}-{revision}{ext}' def render_missing_formats(submission): """Generate txt and html formats from xml draft - todo allow for forms that have already been uploaded + If a txt file already exists, leaves it in place. Overwrites an existing html file + if there is one. """ - # todo check timestamps?? xml2rfc.log.write_out = io.StringIO() # open(os.devnull, "w") xml2rfc.log.write_err = io.StringIO() # open(os.devnull, "w") os.environ["XML_LIBRARY"] = settings.XML_LIBRARY @@ -1134,25 +1137,20 @@ def _normalize_title(title): return normalize_text(title) # normalize whitespace -class SubmissionValidationError(Exception): - """Error class for submission validation checks""" - pass - - def process_submission_xml(submission): """Validate and extract info from an uploaded submission""" xml_path = staging_path(submission.name, submission.rev, '.xml') xml_draft = XMLDraft(xml_path) if submission.name != xml_draft.filename: - raise SubmissionValidationError('XML draft filename disagrees with submission filename') + raise SubmissionError('XML draft filename disagrees with submission filename') if submission.rev != xml_draft.revision: - raise SubmissionValidationError('XML draft revision disagrees with submission revision') + raise SubmissionError('XML draft revision disagrees with submission revision') authors = xml_draft.get_author_list() for a in authors: if not a['email']: - raise SubmissionValidationError(f'Missing email address for author {a}') + raise SubmissionError(f'Missing email address for author {a}') author_emails = [a['email'].lower() for a in authors] submitter = get_person_from_name_email(**submission.submitter_parsed()) # the ** expands dict into kwargs @@ -1160,12 +1158,12 @@ def process_submission_xml(submission): email.address.lower() in author_emails for email in submitter.email_set.filter(active=True) ): - raise SubmissionValidationError(f'Submitter ({submitter}) is not one of the document authors') + raise SubmissionError(f'Submitter ({submitter}) is not one of the document authors') # Fill in the submission data submission.title = _normalize_title(xml_draft.get_title()) if not submission.title: - raise SubmissionValidationError('Could not extract a valid title from the XML') + raise SubmissionError('Could not extract a valid title from the XML') submission.authors = [ {key: auth[key] for key in ('name', 'email', 'affiliation', 'country')} for auth in authors @@ -1186,13 +1184,13 @@ def process_submission_text(submission): text_draft = PlaintextDraft.from_file(text_path) if submission.name != text_draft.filename: - raise SubmissionValidationError('Text draft filename disagrees with submission filename') + raise SubmissionError('Text draft filename disagrees with submission filename') if submission.rev != text_draft.revision: - raise SubmissionValidationError('Text draft revision disagrees with submission revision') + raise SubmissionError('Text draft revision disagrees with submission revision') if not _normalize_title(text_draft.get_title()): - raise SubmissionValidationError('Could not extract a valid title from the text') + raise SubmissionError('Could not extract a valid title from the text') - submission.abstract = text_draft.get_abstract() # todo allow for possibility of abstract from XML? + submission.abstract = text_draft.get_abstract() submission.document_date = text_draft.get_creation_date() submission.pages = text_draft.get_pagecount() submission.words = text_draft.get_wordcount() @@ -1220,15 +1218,31 @@ def abort_submission(error_message): try: process_submission_xml(submission) if check_submission_revision_consistency(submission): - raise SubmissionValidationError( + raise SubmissionError( 'Document revision inconsistency error in the database. ' 'Please contact the secretariat for assistance.' ) render_missing_formats(submission) process_submission_text(submission) set_extresources_from_existing_draft(submission) - except SubmissionValidationError as err: + apply_checkers( + submission, + { + ext: staging_path(submission.name, submission.rev, ext) + for ext in ['xml', 'txt', 'html'] + } + ) + errors = [c.message for c in submission.checks.filter(passed__isnull=False) if not c.passed] + if len(errors) > 0: + raise SubmissionError('Checks failed: ' + ' / '.join(errors)) + except SubmissionError as err: abort_submission(str(err)) except Exception as err: abort_submission('An error occurred during validation. Please contact the secretariat for assistance.') - log.log(f'Exception while validating submission {submission.pk}: {err}') \ No newline at end of file + log.log(f'Exception while validating submission {submission.pk}: {err}') + + + submission.state = DraftSubmissionStateName.objects.get(slug='uploaded') + submission.save() + create_submission_event(None, submission, desc="Completed submission validation checks") + accept_submission(submission) diff --git a/ietf/submit/views.py b/ietf/submit/views.py index e15d4d92e8..25c1cf6091 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -11,7 +11,7 @@ from django.conf import settings from django.contrib import messages from django.contrib.auth.models import User -from django.db import DataError +from django.db import DataError, transaction from django.urls import reverse as urlreverse from django.core.exceptions import ValidationError from django.http import HttpResponseRedirect, Http404, HttpResponseForbidden, HttpResponse @@ -34,7 +34,7 @@ from ietf.submit.mail import send_full_url, send_manual_post_request, add_submission_email, get_reply_to from ietf.submit.models import (Submission, Preapproval, SubmissionExtResource, DraftSubmissionStateName, SubmissionEmailEvent ) -from ietf.submit.tasks import check_and_accept_submission, process_uploaded_submission, poke +from ietf.submit.tasks import process_uploaded_submission, poke from ietf.submit.utils import ( approvable_submissions_for_user, preapprovals_for_user, recently_approved_by_user, validate_submission, create_submission_event, docevent_from_submission, post_submission, cancel_submission, rename_submission_files, remove_submission_files, get_draft_meta, @@ -160,11 +160,10 @@ def err(code, text): submission.save() create_submission_event(request, submission, desc="Uploaded submission through API") - ( - process_uploaded_submission.si(submission.pk) - | check_and_accept_submission(submission.pk) - ).delay() - + # Wrap in on_commit so the delayed task cannot start until the view is done with the DB + transaction.on_commit( + lambda: process_uploaded_submission.delay(submission.pk) + ) return HttpResponse( f'Upload of {submission.name} OK, validation and acceptance pending', content_type="text/plain") From 21588cdc45f855105f0e6c436f1e9402721e5d07 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 19 May 2022 12:32:21 -0300 Subject: [PATCH 23/99] feat: add api_submission_status endpoint --- ietf/api/urls.py | 2 ++ ietf/submit/views.py | 15 ++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/ietf/api/urls.py b/ietf/api/urls.py index 65a51cd0f4..25942b8c55 100644 --- a/ietf/api/urls.py +++ b/ietf/api/urls.py @@ -40,6 +40,8 @@ url(r'^submit/?$', submit_views.api_submit), # Draft upload API url(r'^upload/?$', submit_views.api_upload), + # Draft submission state API + url(r'^submission/(?P[0-9]+)/status/?', submit_views.api_submission_status), # Datatracker version url(r'^version/?$', api_views.version), # Application authentication API key diff --git a/ietf/submit/views.py b/ietf/submit/views.py index 25c1cf6091..eb1ad25848 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -7,6 +7,7 @@ import datetime from typing import Optional, cast # pyflakes:ignore +from urllib.parse import urljoin from django.conf import settings from django.contrib import messages @@ -14,7 +15,7 @@ from django.db import DataError, transaction from django.urls import reverse as urlreverse from django.core.exceptions import ValidationError -from django.http import HttpResponseRedirect, Http404, HttpResponseForbidden, HttpResponse +from django.http import HttpResponseRedirect, Http404, HttpResponseForbidden, HttpResponse, JsonResponse from django.http import HttpRequest # pyflakes:ignore from django.shortcuts import get_object_or_404, redirect, render from django.views.decorators.csrf import csrf_exempt @@ -185,6 +186,18 @@ def err(code, text): else: return err(405, "Method not allowed") + +@csrf_exempt +def api_submission_status(request, submission_id): + submission = get_submission_or_404(submission_id) + return JsonResponse( + { + 'id': str(submission.pk), + 'state': submission.state.slug, + } + ) + + @csrf_exempt def api_submit(request): "Automated submission entrypoint" From 5dbb46d2c4ec55ac15d6d5acceeed12ef2a0c610 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 19 May 2022 12:34:49 -0300 Subject: [PATCH 24/99] refactor: return JSON from submission api endpoints --- ietf/submit/views.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/ietf/submit/views.py b/ietf/submit/views.py index eb1ad25848..e2417eaf24 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -111,8 +111,11 @@ def upload_submission(request): @csrf_exempt def api_upload(request): - def err(code, text): - return HttpResponse(text, status=code, content_type='text/plain') + def err(code, error, messages=None): + data = {'error': error} + if messages is not None: + data['messages'] = [messages] if isinstance(messages, str) else messages + return JsonResponse(data, status=code) if request.method == 'GET': return render(request, 'submit/api_submit_info.html') @@ -165,17 +168,25 @@ def err(code, text): transaction.on_commit( lambda: process_uploaded_submission.delay(submission.pk) ) - return HttpResponse( - f'Upload of {submission.name} OK, validation and acceptance pending', - content_type="text/plain") + return JsonResponse( + { + 'id': str(submission.pk), + 'name': submission.name, + 'rev': submission.rev, + 'status_url': urljoin( + settings.IDTRACKER_BASE_URL, + urlreverse(api_submission_status, kwargs={'submission_id': submission.pk}), + ), + } + ) else: raise ValidationError(form.errors) except IOError as e: exception = e - return err(500, "IO Error: %s" % str(e)) + return err(500, 'IO Error', str(e)) except ValidationError as e: exception = e - return err(400, "Validation Error: %s" % str(e)) + return err(400, 'Validation Error', e.messages) except Exception as e: exception = e raise From 25b2c47d4aa21f3d430ac69cbb8da5bd35e51e4b Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 19 May 2022 12:36:48 -0300 Subject: [PATCH 25/99] refactor: reuse cancel_submission method --- ietf/submit/utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 3447405e8a..cba8911991 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -645,7 +645,6 @@ def update_authors(draft, submission): def cancel_submission(submission): submission.state = DraftSubmissionStateName.objects.get(slug="cancel") submission.save() - remove_submission_files(submission) def rename_submission_files(submission, prev_rev, new_rev): @@ -1207,9 +1206,7 @@ def process_submission_text(submission): def process_uploaded_submission(submission): def abort_submission(error_message): - remove_submission_files(submission) - submission.state_id = 'cancel' - submission.save() + cancel_submission(submission) create_submission_event(None, submission, f'Submission rejected: {error_message}') if submission.file_types != '.xml': From afbae64d37f34109fc2800d923cdc610928ab322 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 19 May 2022 12:37:27 -0300 Subject: [PATCH 26/99] refactor: clean up error reporting a bit --- ietf/submit/forms.py | 13 +++++++++++-- ietf/submit/utils.py | 5 +---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ietf/submit/forms.py b/ietf/submit/forms.py index 3fe07e2547..2ca9110e8d 100644 --- a/ietf/submit/forms.py +++ b/ietf/submit/forms.py @@ -15,10 +15,11 @@ from email.utils import formataddr from unidecode import unidecode +from urllib.parse import urljoin from django import forms from django.conf import settings -from django.utils.html import mark_safe # type:ignore +from django.utils.html import mark_safe, format_html # type:ignore from django.urls import reverse as urlreverse from django.utils.encoding import force_str @@ -644,7 +645,15 @@ def cleanup(): # called when context exited, even in case of exception # check existing existing = Submission.objects.filter(name=self.filename, rev=self.revision).exclude(state__in=("posted", "cancel", "waiting-for-draft")) if existing: - raise forms.ValidationError(mark_safe('A submission with same name and revision is currently being processed. Check the status here.' % urlreverse("ietf.submit.views.submission_status", kwargs={ 'submission_id': existing[0].pk }))) + raise forms.ValidationError( + format_html( + 'A submission with same name and revision is currently being processed. Check the status here.', + urljoin( + settings.IDTRACKER_BASE_URL, + urlreverse("ietf.submit.views.submission_status", kwargs={'submission_id': existing[0].pk}), + ) + ) + ) # cut-off if self.revision == '00' and self.in_first_cut_off: diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index cba8911991..ae2056260b 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -1232,11 +1232,8 @@ def abort_submission(error_message): errors = [c.message for c in submission.checks.filter(passed__isnull=False) if not c.passed] if len(errors) > 0: raise SubmissionError('Checks failed: ' + ' / '.join(errors)) - except SubmissionError as err: - abort_submission(str(err)) except Exception as err: - abort_submission('An error occurred during validation. Please contact the secretariat for assistance.') - log.log(f'Exception while validating submission {submission.pk}: {err}') + abort_submission(str(err)) submission.state = DraftSubmissionStateName.objects.get(slug='uploaded') From b23b016d8f7f6f64f725e1e305ee642ee479818e Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 19 May 2022 15:45:46 -0300 Subject: [PATCH 27/99] feat: guard against cancellation of a submission while validating Not bulletproof but should prevent --- ietf/submit/utils.py | 14 +++++++++----- ietf/submit/views.py | 5 ++++- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index ae2056260b..a6c8e521da 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -1209,6 +1209,9 @@ def abort_submission(error_message): cancel_submission(submission) create_submission_event(None, submission, f'Submission rejected: {error_message}') + if submission.state_id != 'validating': + log.log(f'Submission {submission.pk} is not in "validating" state, skipping.') + if submission.file_types != '.xml': abort_submission('Only XML draft submissions can be processed.') @@ -1235,8 +1238,9 @@ def abort_submission(error_message): except Exception as err: abort_submission(str(err)) - - submission.state = DraftSubmissionStateName.objects.get(slug='uploaded') - submission.save() - create_submission_event(None, submission, desc="Completed submission validation checks") - accept_submission(submission) + # if we get here and are still "validating", accept the draft + if submission.state_id == 'validating': + submission.state = DraftSubmissionStateName.objects.get(slug='uploaded') + submission.save() + create_submission_event(None, submission, desc="Completed submission validation checks") + accept_submission(submission) diff --git a/ietf/submit/views.py b/ietf/submit/views.py index e2417eaf24..88b96d8b8e 100644 --- a/ietf/submit/views.py +++ b/ietf/submit/views.py @@ -348,7 +348,10 @@ def submission_status(request, submission_id, access_token=None): is_ad = area and area.has_role(request.user, "ad") can_edit = can_edit_submission(request.user, submission, access_token) and submission.state_id == "uploaded" - can_cancel = (key_matched or is_secretariat) and submission.state.next_states.filter(slug="cancel") + # disallow cancellation of 'validating' submissions until the async validation process is abortable + can_cancel = ((key_matched or is_secretariat) + and submission.state_id != 'validating' + and submission.state.next_states.filter(slug="cancel")) can_group_approve = (is_secretariat or is_ad or is_chair) and submission.state_id == "grp-appr" can_ad_approve = (is_secretariat or is_ad) and submission.state_id == "ad-appr" From 608af255d4a2b719929afa01b0e2c9b6889686e3 Mon Sep 17 00:00:00 2001 From: Jennifer Richards Date: Thu, 19 May 2022 15:47:21 -0300 Subject: [PATCH 28/99] feat: indicate that a submission is still being validated --- ietf/templates/submit/submission_status.html | 419 ++++++++++--------- 1 file changed, 215 insertions(+), 204 deletions(-) diff --git a/ietf/templates/submit/submission_status.html b/ietf/templates/submit/submission_status.html index 2e1f89ff10..ce70a2bea6 100644 --- a/ietf/templates/submit/submission_status.html +++ b/ietf/templates/submit/submission_status.html @@ -35,75 +35,77 @@

      Submission status: {{ submission.state.name }}

      Please fix errors in the form below.

      {% endif %} -

      Submission checks

      -

      - {% if passes_checks %} - Your draft has been verified to pass the submission checks. - {% else %} - Your draft has NOT been verified to pass the submission checks. - {% endif %} -

      - {% if submission.authors|length > 5 %} -

      - - This document has more than five authors listed, which is considered excessive - under normal circumstances. If you plan to request publication as an RFC, this - will require additional consideration by the stream manager (for example, the - IESG), and publication may be declined unless sufficient justification is - provided. See - RFC 7322, section 4.1.1 - for details. + {% if submission.state_id != 'validating' %} +

      Submission checks

      +

      + {% if passes_checks %} + Your draft has been verified to pass the submission checks. + {% else %} + Your draft has NOT been verified to pass the submission checks. + {% endif %}

      - {% endif %} - {% for check in submission.latest_checks %} - {% if check.errors %} -

      - The {{ check.checker }} returned {{ check.errors }} error{{ check.errors|pluralize }} - and {{ check.warnings }} warning{{ check.warnings|pluralize }}; click the button - below to see details. Please fix those, and resubmit. -

      - {% elif check.warnings %} -

      - The {{ check.checker }} returned {{ check.warnings }} warning{{ check.warnings|pluralize }}. + {% if submission.authors|length > 5 %} +

      + + This document has more than five authors listed, which is considered excessive + under normal circumstances. If you plan to request publication as an RFC, this + will require additional consideration by the stream manager (for example, the + IESG), and publication may be declined unless sufficient justification is + provided. See + RFC 7322, section 4.1.1 + for details.

      {% endif %} - {% endfor %} - {% for check in submission.latest_checks %} - {% if check.passed != None %} - -

    - It takes two parameters: + It takes the following parameters:

    • - user which is the user login + user which is the user login (required)
    • - xml, which is the submitted file + xml, which is the submitted file (required) +
    • +
    • + replaces, a comma-separated list of draft names replaced by this submission (optional)

    @@ -82,7 +85,7 @@

    Draft submission API instructions

    Here is an example of submitting a draft and polling its status through the API:

    -$ curl -s -F "user=user.name@example.com" -F "xml=@~/draft-user-example.xml" {% absurl 'ietf.submit.views.api_submission' %} | jq
    +$ curl -s -F "user=user.name@example.com" -F "xml=@~/draft-user-example.xml" -F "replaces=draft-user-replaced-draft"  {% absurl 'ietf.submit.views.api_submission' %} | jq
     {
       "id": "126375",
       "name": "draft-user-example",
    
    From 783fa119a0df466de69c0275ff9d226141cebfc7 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 30 Jun 2022 18:57:12 -0300
    Subject: [PATCH 69/99] fix: remove unused import
    
    ---
     ietf/utils/xmldraft.py | 1 -
     1 file changed, 1 deletion(-)
    
    diff --git a/ietf/utils/xmldraft.py b/ietf/utils/xmldraft.py
    index 3f3111c3dc..15bf745cc5 100644
    --- a/ietf/utils/xmldraft.py
    +++ b/ietf/utils/xmldraft.py
    @@ -1,7 +1,6 @@
     # Copyright The IETF Trust 2022, All Rights Reserved
     # -*- coding: utf-8 -*-
     import io
    -import os
     import re
     import xml2rfc
     
    
    From b682d7148593949d2c48abddea47d0b9f0b7ae2b Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 30 Jun 2022 19:45:08 -0300
    Subject: [PATCH 70/99] test: test "replaces" validation for submission API
    
    ---
     ietf/submit/tests.py | 110 ++++++++++++++++++++++++++++++++++++++++++-
     1 file changed, 108 insertions(+), 2 deletions(-)
    
    diff --git a/ietf/submit/tests.py b/ietf/submit/tests.py
    index f3cc5f97b8..bf6d1ae17d 100644
    --- a/ietf/submit/tests.py
    +++ b/ietf/submit/tests.py
    @@ -16,6 +16,7 @@
     from pathlib import Path
     
     from django.conf import settings
    +from django.core.files.uploadedfile import SimpleUploadedFile
     from django.db import transaction
     from django.forms import ValidationError
     from django.test import override_settings
    @@ -28,7 +29,8 @@
     from ietf.submit.utils import (expirable_submissions, expire_submission, find_submission_filenames,
                                    post_submission, validate_submission_name, validate_submission_rev,
                                    process_uploaded_submission, SubmissionError, process_submission_text)
    -from ietf.doc.factories import DocumentFactory, WgDraftFactory, IndividualDraftFactory, IndividualRfcFactory
    +from ietf.doc.factories import (DocumentFactory, WgDraftFactory, IndividualDraftFactory, IndividualRfcFactory,
    +                                ReviewFactory, WgRfcFactory)
     from ietf.doc.models import ( Document, DocAlias, DocEvent, State,
         BallotPositionDocEvent, DocumentAuthor, SubmissionDocEvent )
     from ietf.doc.utils import create_ballot_if_not_open, can_edit_docextresources, update_action_holders
    @@ -42,7 +44,7 @@
     from ietf.person.models import Person
     from ietf.person.factories import UserFactory, PersonFactory, EmailFactory
     from ietf.submit.factories import SubmissionFactory, SubmissionExtResourceFactory
    -from ietf.submit.forms import SubmissionBaseUploadForm
    +from ietf.submit.forms import SubmissionBaseUploadForm, SubmissionAutoUploadForm
     from ietf.submit.models import Submission, Preapproval, SubmissionExtResource
     from ietf.submit.mail import add_submission_email, process_response_email
     from ietf.submit.tasks import process_uploaded_submission_task
    @@ -2972,6 +2974,110 @@ def test_check_submission_thresholds(self):
                     max_size=1,  # megabytes
                 )
     
    +    def test_replaces_field(self):
    +        """test SubmissionAutoUploadForm replaces field"""
    +        request_factory = RequestFactory()
    +        WgDraftFactory(name='draft-somebody-test')
    +        existing_drafts = WgDraftFactory.create_batch(2)
    +        xml, auth = submission_file('draft-somebody-test-01', 'draft-somebody-test-01.xml', None, 'test_submission.xml')
    +        files_dict = {
    +                         'xml': SimpleUploadedFile('draft-somebody-test-01.xml', xml.read().encode('utf8'),
    +                                                   content_type='application/xml'),
    +        }
    +
    +        # no replaces
    +        form = SubmissionAutoUploadForm(
    +            request_factory.get('/some/url'),
    +            data={'user': auth.user.username, 'replaces': ''},
    +            files=files_dict,
    +        )
    +        self.assertTrue(form.is_valid())
    +        self.assertEqual(form.cleaned_data['replaces'], '')
    +
    +        # whitespace
    +        form = SubmissionAutoUploadForm(
    +            request_factory.get('/some/url'),
    +            data={'user': auth.user.username, 'replaces': '   '},
    +            files=files_dict,
    +        )
    +        self.assertTrue(form.is_valid())
    +        self.assertEqual(form.cleaned_data['replaces'], '')
    +
    +        # one replaces
    +        form = SubmissionAutoUploadForm(
    +            request_factory.get('/some/url'),
    +            data={'user': auth.user.username, 'replaces': existing_drafts[0].name},
    +            files=files_dict,
    +        )
    +        self.assertTrue(form.is_valid())
    +        self.assertEqual(form.cleaned_data['replaces'], existing_drafts[0].name)
    +
    +        # two replaces
    +        form = SubmissionAutoUploadForm(
    +            request_factory.get('/some/url'),
    +            data={'user': auth.user.username, 'replaces': f'{existing_drafts[0].name},{existing_drafts[1].name}'},
    +            files=files_dict,
    +        )
    +        self.assertTrue(form.is_valid())
    +        self.assertEqual(form.cleaned_data['replaces'], f'{existing_drafts[0].name},{existing_drafts[1].name}')
    +
    +        # two replaces, extra whitespace
    +        form = SubmissionAutoUploadForm(
    +            request_factory.get('/some/url'),
    +            data={'user': auth.user.username, 'replaces': f'   {existing_drafts[0].name} ,  {existing_drafts[1].name}'},
    +            files=files_dict,
    +        )
    +        self.assertTrue(form.is_valid())
    +        self.assertEqual(form.cleaned_data['replaces'], f'{existing_drafts[0].name},{existing_drafts[1].name}')
    +
    +        # can't replace self
    +        form = SubmissionAutoUploadForm(
    +            request_factory.get('/some/url'),
    +            data={'user': auth.user.username, 'replaces': 'draft-somebody-test'},
    +            files=files_dict,
    +        )
    +        self.assertFalse(form.is_valid())
    +        self.assertIn('A draft cannot replace itself', form.errors['replaces'])
    +
    +        # can't replace non-draft
    +        review = ReviewFactory()
    +        form = SubmissionAutoUploadForm(
    +            request_factory.get('/some/url'),
    +            data={'user': auth.user.username, 'replaces': review.name},
    +            files=files_dict,
    +        )
    +        self.assertFalse(form.is_valid())
    +        self.assertIn('A draft can only replace another draft', form.errors['replaces'])
    +
    +        # can't replace RFC
    +        rfc = WgRfcFactory()
    +        form = SubmissionAutoUploadForm(
    +            request_factory.get('/some/url'),
    +            data={'user': auth.user.username, 'replaces': rfc.name},
    +            files=files_dict,
    +        )
    +        self.assertFalse(form.is_valid())
    +        self.assertIn('A draft cannot replace an RFC', form.errors['replaces'])
    +
    +        # can't replace draft approved by iesg
    +        existing_drafts[0].set_state(State.objects.get(type='draft-iesg', slug='approved'))
    +        form = SubmissionAutoUploadForm(
    +            request_factory.get('/some/url'),
    +            data={'user': auth.user.username, 'replaces': existing_drafts[0].name},
    +            files=files_dict,
    +        )
    +        self.assertFalse(form.is_valid())
    +        self.assertIn(f'{existing_drafts[0].name} is approved by the IESG and cannot be replaced',
    +                      form.errors['replaces'])
    +
    +        # unknown draft
    +        form = SubmissionAutoUploadForm(
    +            request_factory.get('/some/url'),
    +            data={'user': auth.user.username, 'replaces': 'fake-name'},
    +            files=files_dict,
    +        )
    +        self.assertFalse(form.is_valid())
    +
     
     class AsyncSubmissionTests(BaseSubmitTestCase):
         """Tests of async submission-related tasks"""
    
    From 60a6ef6250f8b370a06de920b353fc9f35015c8c Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 30 Jun 2022 19:58:19 -0300
    Subject: [PATCH 71/99] test: test that "replaces" is set by api_submission
    
    ---
     ietf/submit/tests.py | 18 ++++++++++++++++++
     1 file changed, 18 insertions(+)
    
    diff --git a/ietf/submit/tests.py b/ietf/submit/tests.py
    index bf6d1ae17d..df6bd04594 100644
    --- a/ietf/submit/tests.py
    +++ b/ietf/submit/tests.py
    @@ -2771,6 +2771,24 @@ def test_upload_draft(self):
             self.assertEqual(submission.state_id, 'validating')
             self.assertIn('Uploaded submission through API', submission.submissionevent_set.last().desc)
     
    +    def test_upload_draft_with_replaces(self):
    +        """api_submission accepts a submission and queues it for processing"""
    +        existing_draft = WgDraftFactory()
    +        url = urlreverse('ietf.submit.views.api_submission')
    +        xml, author = submission_file('draft-somebody-test-00', 'draft-somebody-test-00.xml', None, 'test_submission.xml')
    +        data = {
    +            'xml': xml,
    +            'user': author.user.username,
    +            'replaces': existing_draft.name,
    +        }
    +        # mock out the task so we don't call to celery during testing!
    +        with mock.patch('ietf.submit.views.process_uploaded_submission_task'):
    +            r = self.client.post(url, data)
    +        self.assertEqual(r.status_code, 200)
    +        submission = Submission.objects.last()
    +        self.assertEqual(submission.name, 'draft-somebody-test')
    +        self.assertEqual(submission.replaces, existing_draft.name)
    +
         def test_rejects_broken_upload(self):
             """api_submission immediately rejects a submission with serious problems"""
             orig_submission_count = Submission.objects.count()
    
    From 621941217be52c22ab6affbbf56900a464837c24 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Tue, 5 Jul 2022 17:22:44 -0300
    Subject: [PATCH 72/99] feat: trap TERM to gracefully stop celery container
    
    ---
     dev/celery/docker-init.sh | 16 +++++++++++++++-
     1 file changed, 15 insertions(+), 1 deletion(-)
    
    diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh
    index 4245cff814..6c613e9358 100755
    --- a/dev/celery/docker-init.sh
    +++ b/dev/celery/docker-init.sh
    @@ -9,4 +9,18 @@ if [[ -n "${UPDATE_REQUIREMENTS}" && -r requirements.txt ]]; then
       pip install --upgrade -r requirements.txt
     fi
     
    -celery --app="${CELERY_APP:-ietf}" worker "$@"
    +celery_pid=0
    +cleanup () {
    +  # Cleanly terminate the celery app by sending it a TERM, then waiting for it to exit.
    +  if [[ "${celery_pid}" != 0 ]]; then
    +    echo "Gracefully terminating celery worker. This may take a few minutes if tasks are in progress..."
    +    kill -TERM "${celery_pid}"
    +    wait "${celery_pid}"
    +  fi
    +}
    +
    +trap cleanup TERM
    +# start celery in the background so we can trap the TERM signal
    +celery --app="${CELERY_APP:-ietf}" worker "$@" &
    +celery_pid=$!
    +wait "${celery_pid}"
    
    From a31767b44b2c526cae15bdad18e4f2ebdbf353a6 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 7 Jul 2022 11:19:08 -0300
    Subject: [PATCH 73/99] chore: tweak celery/mq settings
    
    ---
     dev/celery/docker-init.sh | 5 ++---
     docker-compose.yml        | 4 ++++
     ietf/settings.py          | 2 --
     3 files changed, 6 insertions(+), 5 deletions(-)
    
    diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh
    index 6c613e9358..c5d845e6b5 100755
    --- a/dev/celery/docker-init.sh
    +++ b/dev/celery/docker-init.sh
    @@ -9,17 +9,16 @@ if [[ -n "${UPDATE_REQUIREMENTS}" && -r requirements.txt ]]; then
       pip install --upgrade -r requirements.txt
     fi
     
    -celery_pid=0
     cleanup () {
       # Cleanly terminate the celery app by sending it a TERM, then waiting for it to exit.
    -  if [[ "${celery_pid}" != 0 ]]; then
    +  if [[ -n "${celery_pid}" ]]; then
         echo "Gracefully terminating celery worker. This may take a few minutes if tasks are in progress..."
         kill -TERM "${celery_pid}"
         wait "${celery_pid}"
       fi
     }
     
    -trap cleanup TERM
    +trap 'trap "" TERM; cleanup' TERM
     # start celery in the background so we can trap the TERM signal
     celery --app="${CELERY_APP:-ietf}" worker "$@" &
     celery_pid=$!
    diff --git a/docker-compose.yml b/docker-compose.yml
    index 848bbf2072..cce7b9969f 100644
    --- a/docker-compose.yml
    +++ b/docker-compose.yml
    @@ -16,6 +16,7 @@ services:
     
             depends_on:
                 - db
    +            - mq
     
             ipc: host
     
    @@ -68,7 +69,10 @@ services:
                 CELERY_APP: ietf
             command:
                 - '--loglevel=INFO'
    +        depends_on:
    +            - db
             restart: unless-stopped
    +        stop_grace_period: 1m
     
     volumes:
         mariadb-data:
    diff --git a/ietf/settings.py b/ietf/settings.py
    index 81800e0a5d..9338b81728 100644
    --- a/ietf/settings.py
    +++ b/ietf/settings.py
    @@ -1169,8 +1169,6 @@ def skip_unreadable_post(record):
     # Celery configuration
     CELERY_TIMEZONE = 'UTC'
     CELERY_BROKER_URL = 'amqp://mq/'
    -CELERY_ACKS_LATE = True  # tasks aborted due to worker failure will retry; keep tasks idempotent or disable per-task
    -# (CELERY_ACKS_LATE does not retry a task that fails, including due to a clean worker shutdown)
     
     # Meetecho API setup: Uncomment this and provide real credentials to enable
     # Meetecho conference creation for interim session requests
    
    From d2e3acd615d37c74c99a219ed6e86355d2ef5437 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 7 Jul 2022 12:51:34 -0300
    Subject: [PATCH 74/99] docs: update installation instructions
    
    ---
     dev/INSTALL | 41 +++++++++++++++++++++++++++++++++--------
     1 file changed, 33 insertions(+), 8 deletions(-)
    
    diff --git a/dev/INSTALL b/dev/INSTALL
    index 132e607f52..6da6cb53de 100644
    --- a/dev/INSTALL
    +++ b/dev/INSTALL
    @@ -36,31 +36,56 @@ General Instructions for Deployment of a New Release
     
      6. Run system checks (which patches the just installed modules)::
     
    -     ietf/manage.py check
    +      ietf/manage.py check
     
    - 7. Run migrations:  
    + 7. Switch to the docker directory and update async task docker image:
    +
    +      cd /a/docker/datatracker-cel
    +      docker image tag ghcr.io/ietf-tools/datatracker-celery:latest datatracker-celery-fallback
    +      docker-compose pull celery
    +
    + 8. Stop async task worker (this may take a few minutes if tasks are in progress):
    +
    +      docker-compose stop celery
    +      docker-compose rm celery
    +      cd -
    +
    + 9. Run migrations:
     
           ietf/manage.py migrate
     
           Take note if any migrations were executed.
      
    - 8. Back out one directory level, then re-point the 'web' symlink::
    + 10. Back out one directory level, then re-point the 'web' symlink::
     
           cd ..
           rm ./web; ln -s ${releasenumber} web
     
    - 9. Reload the datatracker service (it is no longer necessary to restart apache) ::
    + 11. Reload the datatracker service (it is no longer necessary to restart apache) ::
     
           exit # or CTRL-D, back to root level shell
           systemctl restart datatracker
     
    - 10. Verify operation: 
    + 12. Start async task worker:
    +
    +      cd /a/docker/datatracker-cel && bash startcommand && cd -
    +
    + 13. Verify operation:
     
           http://datatracker.ietf.org/
     
    - 11. If install failed and there were no migrations at step 7, revert web symlink and repeat the restart in step 9.
    -     If there were migrations at step 7, they will need to be reversed before the restart at step 9. If it's not obvious
    -     what to do to reverse the migrations, contact the dev team.
    + 14. If install failed and there were no migrations at step 9, revert web symlink and docker update and repeat the
    +     restart in steps 11 and 12. To revert the docker update:
    +
    +          cd /a/docker/datatracker-cel
    +          docker-compose stop celery
    +          docker-compose rm celery
    +          docker image rm ghcr.io/ietf-tools/datatracker-celery:latest
    +          docker image tag datatracker-celery-fallback ghcr.io/ietf-tools/datatracker-celery:latest
    +          cd -
    +
    +     If there were migrations at step 7, they will need to be reversed before the restart at step 11.
    +     If it's not obvious what to do to reverse the migrations, contact the dev team.
     
      
     Patching a Production Release
    
    From df10bc07629c33ce92082553358ca6f1149a8b8b Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 7 Jul 2022 13:03:11 -0300
    Subject: [PATCH 75/99] ci: adjust paths that trigger celery worker image 
     build
    
    ---
     .github/workflows/build-celery-worker.yml | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/.github/workflows/build-celery-worker.yml b/.github/workflows/build-celery-worker.yml
    index 24e83f323a..2ce078f218 100644
    --- a/.github/workflows/build-celery-worker.yml
    +++ b/.github/workflows/build-celery-worker.yml
    @@ -8,7 +8,7 @@ on:
           - 'jennifer/submit-async'
         paths:
           - 'requirements.txt'
    -      - 'dev/celery/Dockerfile'
    +      - 'dev/celery/**'
     
       workflow_dispatch: 
     
    
    From 7a7a575f623b2d307028f8176f9164c6c72ec5f2 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Wed, 13 Jul 2022 14:04:38 -0300
    Subject: [PATCH 76/99] ci: fix branches/repo names left over from dev
    
    ---
     .github/workflows/build-celery-worker.yml | 4 +---
     1 file changed, 1 insertion(+), 3 deletions(-)
    
    diff --git a/.github/workflows/build-celery-worker.yml b/.github/workflows/build-celery-worker.yml
    index 2ce078f218..86ed8ec87d 100644
    --- a/.github/workflows/build-celery-worker.yml
    +++ b/.github/workflows/build-celery-worker.yml
    @@ -4,8 +4,6 @@ on:
       push:
         branches:
           - 'main'
    -      - 'jennifer/celery'
    -      - 'jennifer/submit-async'
         paths:
           - 'requirements.txt'
           - 'dev/celery/**'
    @@ -42,5 +40,5 @@ jobs:
             file: dev/celery/Dockerfile
             platforms: linux/amd64,linux/arm64
             push: true
    -        tags: ghcr.io/painless-security/datatracker-celery:latest
    +        tags: ghcr.io/ietf-tools/datatracker-celery:latest
     
    
    From e99f2c321c3f2d2b52490726f364548371e377e0 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Wed, 13 Jul 2022 15:46:37 -0300
    Subject: [PATCH 77/99] ci: run manage.py check when initializing celery
     container
    
    Driver here is applying the patches. Starting the celery workers
    also invokes the check task, but this should cause a clearer failure
    if something fails.
    ---
     dev/celery/docker-init.sh | 3 +++
     1 file changed, 3 insertions(+)
    
    diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh
    index c5d845e6b5..3728c9b633 100755
    --- a/dev/celery/docker-init.sh
    +++ b/dev/celery/docker-init.sh
    @@ -9,6 +9,9 @@ if [[ -n "${UPDATE_REQUIREMENTS}" && -r requirements.txt ]]; then
       pip install --upgrade -r requirements.txt
     fi
     
    +echo "Running initial checks..."
    +/usr/local/bin/python $WORKSPACEDIR/ietf/manage.py check --settings=settings_local
    +
     cleanup () {
       # Cleanly terminate the celery app by sending it a TERM, then waiting for it to exit.
       if [[ -n "${celery_pid}" ]]; then
    
    From dbdf092f7d043f94339bcee58d6e142bd53d2cc6 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Wed, 13 Jul 2022 18:39:27 -0300
    Subject: [PATCH 78/99] docs: revise INSTALL instructions
    
    ---
     dev/INSTALL | 20 +++++++++++++++-----
     1 file changed, 15 insertions(+), 5 deletions(-)
    
    diff --git a/dev/INSTALL b/dev/INSTALL
    index 6da6cb53de..b90c23e9ff 100644
    --- a/dev/INSTALL
    +++ b/dev/INSTALL
    @@ -44,14 +44,14 @@ General Instructions for Deployment of a New Release
           docker image tag ghcr.io/ietf-tools/datatracker-celery:latest datatracker-celery-fallback
           docker-compose pull celery
     
    - 8. Stop async task worker (this may take a few minutes if tasks are in progress):
    + 8. Stop and remove the async task container (this may take a few minutes if tasks are in progress):
     
           docker-compose stop celery
           docker-compose rm celery
    -      cd -
     
    - 9. Run migrations:
    + 9. Return to the release directory and run migrations:
     
    +      cd /a/www/ietf-datatracker/${releasenumber}
           ietf/manage.py migrate
     
           Take note if any migrations were executed.
    @@ -68,7 +68,8 @@ General Instructions for Deployment of a New Release
     
      12. Start async task worker:
     
    -      cd /a/docker/datatracker-cel && bash startcommand && cd -
    +      cd /a/docker/datatracker-cel
    +      bash startcommand
     
      13. Verify operation:
     
    @@ -120,8 +121,17 @@ The following process should be used:
      6. Edit ``.../ietf/__init__.py`` in the new patched release to indicate the patch
         version in the ``__patch__`` string.
     
    - 7. Change the 'web' symlink, reload etc. as described in
    + 7. Stop the async task container (this may take a few minutes if tasks are in progress):
    +
    +      cd /a/docker/datatracker-cel
    +      docker-compose stop celery
    +
    + 8. Change the 'web' symlink, reload etc. as described in
         `General Instructions for Deployment of a New Release`_.
     
    + 9. Start async task worker:
    +
    +      cd /a/docker/datatracker-cel
    +      bash startcommand
     
     
    
    From a048fff4099b1347c7c449803bd7976df0028134 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 14 Jul 2022 18:04:14 -0300
    Subject: [PATCH 79/99] ci: pass filename to pip update in celery container
    
    ---
     dev/celery/docker-init.sh | 16 ++++++++++++----
     docker-compose.yml        |  1 +
     2 files changed, 13 insertions(+), 4 deletions(-)
    
    diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh
    index 3728c9b633..552e179cea 100755
    --- a/dev/celery/docker-init.sh
    +++ b/dev/celery/docker-init.sh
    @@ -1,12 +1,20 @@
     #!/bin/bash
    -
    +#
    +# Environment parameters:
    +#
    +#   CELERY_APP - name of application to pass to celery (defaults to ietf)
    +#
    +#   UPDATES_REQUIREMENTS_FROM - path, relative to /workspace mount, to a pip requirements
    +#       file that should be installed at container startup. Default is no package install/update.
    +#
     WORKSPACEDIR="/workspace"
     
     cd "$WORKSPACEDIR" || exit 255
     
    -if [[ -n "${UPDATE_REQUIREMENTS}" && -r requirements.txt ]]; then
    -  echo "Updating requirements..."
    -  pip install --upgrade -r requirements.txt
    +if [[ -n "${UPDATE_REQUIREMENTS_FROM}" ]]; then
    +  reqs_file="${WORKSPACEDIR}/${UPDATE_REQUIREMENTS_FROM}"
    +  echo "Updating requirements from ${reqs_file}..."
    +  pip install --upgrade -r "${reqs_file}"
     fi
     
     echo "Running initial checks..."
    diff --git a/docker-compose.yml b/docker-compose.yml
    index cce7b9969f..e73d72301d 100644
    --- a/docker-compose.yml
    +++ b/docker-compose.yml
    @@ -67,6 +67,7 @@ services:
             image: ghcr.io/ietf-tools/datatracker-celery:latest
             environment:
                 CELERY_APP: ietf
    +            UPDATE_REQUIREMENTS_FROM: requirements.txt
             command:
                 - '--loglevel=INFO'
             depends_on:
    
    From 22e82801421e7adf14801277fced3fa5b54301dc Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 14 Jul 2022 18:05:09 -0300
    Subject: [PATCH 80/99] docs: update INSTALL to include freezing pip versions
    
    Will be used to coordinate package versions with the celery
    container in production.
    ---
     dev/INSTALL | 1 +
     1 file changed, 1 insertion(+)
    
    diff --git a/dev/INSTALL b/dev/INSTALL
    index b90c23e9ff..1c90f079bf 100644
    --- a/dev/INSTALL
    +++ b/dev/INSTALL
    @@ -29,6 +29,7 @@ General Instructions for Deployment of a New Release
           python3.9 -mvenv env
           source env/bin/activate
           pip install -r requirements.txt
    +      pip freeze > frozen-requirements.txt
     
      5. Move static files into place for CDN (/a/www/www6s/lib/dt):
     
    
    From 18f234132929bb808d18a98f96cd0c2b6a503a5c Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Tue, 2 Aug 2022 18:30:34 -0300
    Subject: [PATCH 81/99] docs: add explanation of frozen-requirements.txt
    
    ---
     dev/INSTALL | 4 ++++
     1 file changed, 4 insertions(+)
    
    diff --git a/dev/INSTALL b/dev/INSTALL
    index 1c90f079bf..ed660125ed 100644
    --- a/dev/INSTALL
    +++ b/dev/INSTALL
    @@ -31,6 +31,10 @@ General Instructions for Deployment of a New Release
           pip install -r requirements.txt
           pip freeze > frozen-requirements.txt
     
    +    (The pip freeze command records the exact versions of the Python libraries that pip installed.
    +     This is used by the celery docker container to ensure it uses the same library versions as
    +     the datatracker service.)
    +
      5. Move static files into place for CDN (/a/www/www6s/lib/dt):
     
           ietf/manage.py collectstatic
    
    From a48f582858c9258a568fb335ce9ff2056330f8df Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Wed, 3 Aug 2022 14:38:22 -0300
    Subject: [PATCH 82/99] ci: build image for sandbox deployment
    
    ---
     .github/workflows/build-celery-worker.yml | 4 +++-
     1 file changed, 3 insertions(+), 1 deletion(-)
    
    diff --git a/.github/workflows/build-celery-worker.yml b/.github/workflows/build-celery-worker.yml
    index 86ed8ec87d..9fa5245fa1 100644
    --- a/.github/workflows/build-celery-worker.yml
    +++ b/.github/workflows/build-celery-worker.yml
    @@ -4,6 +4,7 @@ on:
       push:
         branches:
           - 'main'
    +      - 'jennifer/submit-async'
         paths:
           - 'requirements.txt'
           - 'dev/celery/**'
    @@ -40,5 +41,6 @@ jobs:
             file: dev/celery/Dockerfile
             platforms: linux/amd64,linux/arm64
             push: true
    -        tags: ghcr.io/ietf-tools/datatracker-celery:latest
    +#        tags: ghcr.io/ietf-tools/datatracker-celery:latest
    +        tags: ghcr.io/painless-security/datatracker-celery:latest
     
    
    From 2d43316c04897dde92d8c548a69a63e8c97878a6 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Wed, 3 Aug 2022 14:41:00 -0300
    Subject: [PATCH 83/99] ci: add additional build trigger path
    
    ---
     .github/workflows/build-celery-worker.yml | 1 +
     1 file changed, 1 insertion(+)
    
    diff --git a/.github/workflows/build-celery-worker.yml b/.github/workflows/build-celery-worker.yml
    index 9fa5245fa1..93f8c2a43d 100644
    --- a/.github/workflows/build-celery-worker.yml
    +++ b/.github/workflows/build-celery-worker.yml
    @@ -8,6 +8,7 @@ on:
         paths:
           - 'requirements.txt'
           - 'dev/celery/**'
    +      - '.github/workflows/build-celery-worker.yml'
     
       workflow_dispatch: 
     
    
    From 40f01a3d83db4893e0d0d9c1cfcb8e9797747215 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Wed, 3 Aug 2022 15:16:17 -0300
    Subject: [PATCH 84/99] docs: tweak INSTALL
    
    ---
     dev/INSTALL | 4 +++-
     1 file changed, 3 insertions(+), 1 deletion(-)
    
    diff --git a/dev/INSTALL b/dev/INSTALL
    index ed660125ed..f376e54c41 100644
    --- a/dev/INSTALL
    +++ b/dev/INSTALL
    @@ -49,7 +49,9 @@ General Instructions for Deployment of a New Release
           docker image tag ghcr.io/ietf-tools/datatracker-celery:latest datatracker-celery-fallback
           docker-compose pull celery
     
    - 8. Stop and remove the async task container (this may take a few minutes if tasks are in progress):
    + 8. Stop and remove the async task container:
    +    (Wait for these to finish cleanly - it may take up to about 10 minutes for the 'stop' command to
    +     complete if a long-running task is in progress.)
     
           docker-compose stop celery
           docker-compose rm celery
    
    From bec798c26f3cdb89d6887f60afcd4045ff2a099c Mon Sep 17 00:00:00 2001
    From: Robert Sparks 
    Date: Wed, 3 Aug 2022 13:22:12 -0500
    Subject: [PATCH 85/99] fix: change INSTALL process to stop datatracker before
     running migrations
    
    ---
     dev/INSTALL | 21 ++++++++++++++-------
     1 file changed, 14 insertions(+), 7 deletions(-)
    
    diff --git a/dev/INSTALL b/dev/INSTALL
    index f376e54c41..86a9502326 100644
    --- a/dev/INSTALL
    +++ b/dev/INSTALL
    @@ -56,33 +56,40 @@ General Instructions for Deployment of a New Release
           docker-compose stop celery
           docker-compose rm celery
     
    - 9. Return to the release directory and run migrations:
    + 9. Stop the datatracker 
    +    (consider doing this with a second shell at ietfa to avoid the exit and shift back to wwwrun)
    +
    +      exit
    +      sudo systemctl stop datatracker.socket datatracker.service
    +      sudo su - -s /bin/bash wwwrun
    +
    + 10. Return to the release directory and run migrations:
     
           cd /a/www/ietf-datatracker/${releasenumber}
           ietf/manage.py migrate
     
           Take note if any migrations were executed.
      
    - 10. Back out one directory level, then re-point the 'web' symlink::
    + 11. Back out one directory level, then re-point the 'web' symlink::
     
           cd ..
           rm ./web; ln -s ${releasenumber} web
     
    - 11. Reload the datatracker service (it is no longer necessary to restart apache) ::
    + 12. Start the datatracker service (it is no longer necessary to restart apache) ::
     
           exit # or CTRL-D, back to root level shell
    -      systemctl restart datatracker
    +      sudo systemctl start datatracker.service datatracker.socket
     
    - 12. Start async task worker:
    + 13. Start async task worker:
     
           cd /a/docker/datatracker-cel
           bash startcommand
     
    - 13. Verify operation:
    + 14. Verify operation:
     
           http://datatracker.ietf.org/
     
    - 14. If install failed and there were no migrations at step 9, revert web symlink and docker update and repeat the
    + 15. If install failed and there were no migrations at step 9, revert web symlink and docker update and repeat the
          restart in steps 11 and 12. To revert the docker update:
     
               cd /a/docker/datatracker-cel
    
    From ce947852500100409f231508c1d0233244de0795 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 4 Aug 2022 13:06:01 -0300
    Subject: [PATCH 86/99] chore: use ietf.settings for manage.py check in celery
     container
    
    ---
     dev/celery/docker-init.sh | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh
    index 552e179cea..63188a8eb9 100755
    --- a/dev/celery/docker-init.sh
    +++ b/dev/celery/docker-init.sh
    @@ -18,7 +18,7 @@ if [[ -n "${UPDATE_REQUIREMENTS_FROM}" ]]; then
     fi
     
     echo "Running initial checks..."
    -/usr/local/bin/python $WORKSPACEDIR/ietf/manage.py check --settings=settings_local
    +/usr/local/bin/python $WORKSPACEDIR/ietf/manage.py check
     
     cleanup () {
       # Cleanly terminate the celery app by sending it a TERM, then waiting for it to exit.
    
    From e74cbc5ab651b05c69f3173dfc69a8976f9021d4 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 4 Aug 2022 13:45:24 -0300
    Subject: [PATCH 87/99] chore: set uid/gid for celery worker
    
    ---
     dev/celery/docker-init.sh | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh
    index 63188a8eb9..889b075d9b 100755
    --- a/dev/celery/docker-init.sh
    +++ b/dev/celery/docker-init.sh
    @@ -31,6 +31,6 @@ cleanup () {
     
     trap 'trap "" TERM; cleanup' TERM
     # start celery in the background so we can trap the TERM signal
    -celery --app="${CELERY_APP:-ietf}" worker "$@" &
    +celery --app="${CELERY_APP:-ietf}" --uid="${CELERY_UID:-0}" --gid="${CELERY_GID:-0}" worker "$@" &
     celery_pid=$!
     wait "${celery_pid}"
    
    From cdac495ebece9bfc51641343586f08c241c050d3 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 4 Aug 2022 14:28:31 -0300
    Subject: [PATCH 88/99] chore: create user/group in celery container if needed
    
    ---
     dev/INSTALL               |  6 ++++--
     dev/celery/docker-init.sh | 27 ++++++++++++++++++++++++++-
     2 files changed, 30 insertions(+), 3 deletions(-)
    
    diff --git a/dev/INSTALL b/dev/INSTALL
    index 86a9502326..bbfc1791a2 100644
    --- a/dev/INSTALL
    +++ b/dev/INSTALL
    @@ -50,12 +50,14 @@ General Instructions for Deployment of a New Release
           docker-compose pull celery
     
      8. Stop and remove the async task container:
    -    (Wait for these to finish cleanly - it may take up to about 10 minutes for the 'stop' command to
    -     complete if a long-running task is in progress.)
    +    Wait for these to finish cleanly. It may take up to about 10 minutes for the 'stop' command to
    +    complete if a long-running task is in progress.
     
           docker-compose stop celery
           docker-compose rm celery
     
    +    (Answer 'y' when prompted to remove the container.)
    +
      9. Stop the datatracker 
         (consider doing this with a second shell at ietfa to avoid the exit and shift back to wwwrun)
     
    diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh
    index 889b075d9b..fb959581c2 100755
    --- a/dev/celery/docker-init.sh
    +++ b/dev/celery/docker-init.sh
    @@ -4,6 +4,10 @@
     #
     #   CELERY_APP - name of application to pass to celery (defaults to ietf)
     #
    +#   CELERY_UID - numeric uid for the celery worker process
    +#
    +#   CELERY_GID - numeric gid for the celery worker process
    +#
     #   UPDATES_REQUIREMENTS_FROM - path, relative to /workspace mount, to a pip requirements
     #       file that should be installed at container startup. Default is no package install/update.
     #
    @@ -20,6 +24,27 @@ fi
     echo "Running initial checks..."
     /usr/local/bin/python $WORKSPACEDIR/ietf/manage.py check
     
    +if [[ -n "${CELERY_UID}" ]]; then
    +  # ensure that some group with the necessary GID exists in container
    +  if ! id "${CELERY_UID}" ; then
    +    adduser --system --uid "${CELERY_UID}" --no-create-home --disabled-login "celery-user-${CELERY_UID}"
    +  fi
    +  UID_OPT="--uid=${CELERY_UID}"
    +else
    +  UID_OPT=
    +fi
    +
    +if [[ -n "${CELERY_GID}" ]]; then
    +  # ensure that some group with the necessary GID exists in container
    +  if ! getent group "${CELERY_GID}" ; then
    +    addgroup --gid "${CELERY_GID}" "celery-group-${CELERY_GID}"
    +  fi
    +  GID_OPT="--gid=${CELERY_GID}"
    +else
    +  GID_OPT=
    +fi
    +
    +
     cleanup () {
       # Cleanly terminate the celery app by sending it a TERM, then waiting for it to exit.
       if [[ -n "${celery_pid}" ]]; then
    @@ -31,6 +56,6 @@ cleanup () {
     
     trap 'trap "" TERM; cleanup' TERM
     # start celery in the background so we can trap the TERM signal
    -celery --app="${CELERY_APP:-ietf}" --uid="${CELERY_UID:-0}" --gid="${CELERY_GID:-0}" worker "$@" &
    +celery --app="${CELERY_APP:-ietf}" worker "${UID_OPT}" ${GID_OPT} "$@" &
     celery_pid=$!
     wait "${celery_pid}"
    
    From c56168cd21a108c94bd17e14ff5725330de5ada5 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Thu, 4 Aug 2022 17:03:01 -0300
    Subject: [PATCH 89/99] chore: tweak docker compose/init so celery container
     works in dev
    
    ---
     dev/celery/docker-init.sh        | 11 ++++-------
     docker/docker-compose.extend.yml |  1 +
     2 files changed, 5 insertions(+), 7 deletions(-)
    
    diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh
    index fb959581c2..b7b01feeb3 100755
    --- a/dev/celery/docker-init.sh
    +++ b/dev/celery/docker-init.sh
    @@ -24,14 +24,13 @@ fi
     echo "Running initial checks..."
     /usr/local/bin/python $WORKSPACEDIR/ietf/manage.py check
     
    +CELERY_WORKER_OPTS=()
     if [[ -n "${CELERY_UID}" ]]; then
       # ensure that some group with the necessary GID exists in container
       if ! id "${CELERY_UID}" ; then
         adduser --system --uid "${CELERY_UID}" --no-create-home --disabled-login "celery-user-${CELERY_UID}"
       fi
    -  UID_OPT="--uid=${CELERY_UID}"
    -else
    -  UID_OPT=
    +  CELERY_WORKER_OPTS+=("--uid=${CELERY_UID}")
     fi
     
     if [[ -n "${CELERY_GID}" ]]; then
    @@ -39,9 +38,7 @@ if [[ -n "${CELERY_GID}" ]]; then
       if ! getent group "${CELERY_GID}" ; then
         addgroup --gid "${CELERY_GID}" "celery-group-${CELERY_GID}"
       fi
    -  GID_OPT="--gid=${CELERY_GID}"
    -else
    -  GID_OPT=
    +  CELERY_WORKER_OPTS+=("--gid=${CELERY_GID}")
     fi
     
     
    @@ -56,6 +53,6 @@ cleanup () {
     
     trap 'trap "" TERM; cleanup' TERM
     # start celery in the background so we can trap the TERM signal
    -celery --app="${CELERY_APP:-ietf}" worker "${UID_OPT}" ${GID_OPT} "$@" &
    +celery --app="${CELERY_APP:-ietf}" worker "${CELERY_WORKER_OPTS[@]}" "$@" &
     celery_pid=$!
     wait "${celery_pid}"
    diff --git a/docker/docker-compose.extend.yml b/docker/docker-compose.extend.yml
    index 2fbb0e1c14..06e47bbb08 100644
    --- a/docker/docker-compose.extend.yml
    +++ b/docker/docker-compose.extend.yml
    @@ -18,3 +18,4 @@ services:
         celery:
             volumes:
                 - .:/workspace
    +            - app-assets:/assets
    
    From 26980e9973b11883ac3d3824741d7e68a5b4726d Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Fri, 5 Aug 2022 12:09:12 -0300
    Subject: [PATCH 90/99] ci: build mq docker image
    
    ---
     .github/workflows/build-mq-broker.yml | 46 +++++++++++++++++++++++++++
     dev/mq/Dockerfile                     | 17 ++++++++++
     dev/mq/definitions.json               | 30 +++++++++++++++++
     dev/mq/ietf-rabbitmq-server.bash      | 18 +++++++++++
     dev/mq/rabbitmq.conf                  | 18 +++++++++++
     5 files changed, 129 insertions(+)
     create mode 100644 .github/workflows/build-mq-broker.yml
     create mode 100644 dev/mq/Dockerfile
     create mode 100644 dev/mq/definitions.json
     create mode 100755 dev/mq/ietf-rabbitmq-server.bash
     create mode 100644 dev/mq/rabbitmq.conf
    
    diff --git a/.github/workflows/build-mq-broker.yml b/.github/workflows/build-mq-broker.yml
    new file mode 100644
    index 0000000000..8b77d6dfef
    --- /dev/null
    +++ b/.github/workflows/build-mq-broker.yml
    @@ -0,0 +1,46 @@
    +name: Build MQ Broker Docker Image
    +
    +on:
    +  push:
    +    branches:
    +      - 'main'
    +      - 'jennifer/submit-async'
    +    paths:
    +      - 'dev/mq/**'
    +      - '.github/workflows/build-mq-worker.yml'
    +
    +  workflow_dispatch: 
    +
    +jobs:
    +  publish:
    +    runs-on: ubuntu-latest
    +    permissions:
    +      contents: read
    +      packages: write
    +
    +    steps:
    +    - uses: actions/checkout@v2
    +
    +    - name: Set up QEMU
    +      uses: docker/setup-qemu-action@v2
    +    
    +    - name: Set up Docker Buildx
    +      uses: docker/setup-buildx-action@v2
    +
    +    - name: Login to GitHub Container Registry
    +      uses: docker/login-action@v2
    +      with:
    +        registry: ghcr.io
    +        username: ${{ github.actor }}
    +        password: ${{ secrets.GITHUB_TOKEN }}
    +
    +    - name: Docker Build & Push
    +      uses: docker/build-push-action@v3
    +      with:
    +        context: .
    +        file: dev/mq/Dockerfile
    +        platforms: linux/amd64,linux/arm64
    +        push: true
    +#        tags: ghcr.io/ietf-tools/datatracker-mq:latest
    +        tags: ghcr.io/painless-security/datatracker-mq:latest
    +
    diff --git a/dev/mq/Dockerfile b/dev/mq/Dockerfile
    new file mode 100644
    index 0000000000..e8871c30a9
    --- /dev/null
    +++ b/dev/mq/Dockerfile
    @@ -0,0 +1,17 @@
    +# Dockerfile for RabbitMQ worker
    +#
    +FROM rabbitmq:3-alpine
    +LABEL maintainer="IETF Tools Team "
    +
    +# Copy the startup file
    +COPY dev/mq/ietf-rabbitmq-server.bash /ietf-rabbitmq-server.bash
    +RUN sed -i 's/\r$//' /ietf-rabbitmq-server.bash && \
    +    chmod +x /ietf-rabbitmq-server.bash
    +
    +# Put the rabbitmq.conf in the conf.d so it runs after 10-defaults.conf.
    +# Can override this for an individual container by mounting additional
    +# config files in /etc/rabbitmq/conf.d.
    +COPY dev/mq/rabbitmq.conf /etc/rabbitmq/conf.d/20-ietf-config.conf
    +COPY dev/mq/definitions.json /definitions.json
    +
    +CMD ["/ietf-rabbitmq-server.bash"]
    diff --git a/dev/mq/definitions.json b/dev/mq/definitions.json
    new file mode 100644
    index 0000000000..60e4fdba07
    --- /dev/null
    +++ b/dev/mq/definitions.json
    @@ -0,0 +1,30 @@
    +{
    +  "permissions": [
    +    {
    +      "configure": ".*",
    +      "read": ".*",
    +      "user": "datatracker",
    +      "vhost": "dt",
    +      "write": ".*"
    +    }
    +  ],
    +  "users": [
    +    {
    +      "hashing_algorithm": "rabbit_password_hashing_sha256",
    +      "limits": {},
    +      "name": "datatracker",
    +      "password_hash": "",
    +      "tags": []
    +    }
    +  ],
    +  "vhosts": [
    +    {
    +      "limits": [],
    +      "metadata": {
    +        "description": "",
    +        "tags": []
    +      },
    +      "name": "dt"
    +    }
    +  ]
    +}
    diff --git a/dev/mq/ietf-rabbitmq-server.bash b/dev/mq/ietf-rabbitmq-server.bash
    new file mode 100755
    index 0000000000..145b13e631
    --- /dev/null
    +++ b/dev/mq/ietf-rabbitmq-server.bash
    @@ -0,0 +1,18 @@
    +#!/bin/bash -x
    +#
    +# Environment parameters:
    +#
    +#   CELERY_PASSWORD - password for the datatracker celery user
    +#
    +export RABBITMQ_PID_FILE=/var/run/rabbitmq.pid
    +
    +update_celery_password () {
    +  rabbitmqctl wait "${RABBITMQ_PID_FILE}" --timeout 300
    +  rabbitmqctl await_startup --timeout 300
    +  rabbitmqctl change_password datatracker <
    Date: Fri, 5 Aug 2022 13:39:57 -0300
    Subject: [PATCH 91/99] fix: move rabbitmq.pid to writeable location
    
    ---
     dev/mq/ietf-rabbitmq-server.bash | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/dev/mq/ietf-rabbitmq-server.bash b/dev/mq/ietf-rabbitmq-server.bash
    index 145b13e631..212753ddc6 100755
    --- a/dev/mq/ietf-rabbitmq-server.bash
    +++ b/dev/mq/ietf-rabbitmq-server.bash
    @@ -4,7 +4,7 @@
     #
     #   CELERY_PASSWORD - password for the datatracker celery user
     #
    -export RABBITMQ_PID_FILE=/var/run/rabbitmq.pid
    +export RABBITMQ_PID_FILE=/tmp/rabbitmq.pid
     
     update_celery_password () {
       rabbitmqctl wait "${RABBITMQ_PID_FILE}" --timeout 300
    
    From e7949d0513df5d7a96a030dc4b8ddafbfe27e728 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Fri, 5 Aug 2022 14:01:15 -0300
    Subject: [PATCH 92/99] fix: clear password when CELERY_PASSWORD is empty
    
    Setting to an empty password is really not a good plan!
    ---
     dev/INSTALL                      | 24 +++++++++++-------------
     dev/mq/ietf-rabbitmq-server.bash |  6 +++++-
     2 files changed, 16 insertions(+), 14 deletions(-)
    
    diff --git a/dev/INSTALL b/dev/INSTALL
    index bbfc1791a2..f422af0f22 100644
    --- a/dev/INSTALL
    +++ b/dev/INSTALL
    @@ -43,20 +43,18 @@ General Instructions for Deployment of a New Release
     
           ietf/manage.py check
     
    - 7. Switch to the docker directory and update async task docker image:
    + 7. Switch to the docker directory and update images:
     
           cd /a/docker/datatracker-cel
           docker image tag ghcr.io/ietf-tools/datatracker-celery:latest datatracker-celery-fallback
    -      docker-compose pull celery
    +      docker image tag ghcr.io/ietf-tools/datatracker-mq:latest datatracker-mq-fallback
    +      docker-compose pull
     
    - 8. Stop and remove the async task container:
    -    Wait for these to finish cleanly. It may take up to about 10 minutes for the 'stop' command to
    + 8. Stop and remove the async task containers:
    +    Wait for this to finish cleanly. It may take up to about 10 minutes for the 'stop' command to
         complete if a long-running task is in progress.
     
    -      docker-compose stop celery
    -      docker-compose rm celery
    -
    -    (Answer 'y' when prompted to remove the container.)
    +      docker-compose down
     
      9. Stop the datatracker 
         (consider doing this with a second shell at ietfa to avoid the exit and shift back to wwwrun)
    @@ -82,7 +80,7 @@ General Instructions for Deployment of a New Release
           exit # or CTRL-D, back to root level shell
           sudo systemctl start datatracker.service datatracker.socket
     
    - 13. Start async task worker:
    + 13. Start async task worker and message broker:
     
           cd /a/docker/datatracker-cel
           bash startcommand
    @@ -95,13 +93,13 @@ General Instructions for Deployment of a New Release
          restart in steps 11 and 12. To revert the docker update:
     
               cd /a/docker/datatracker-cel
    -          docker-compose stop celery
    -          docker-compose rm celery
    -          docker image rm ghcr.io/ietf-tools/datatracker-celery:latest
    +          docker-compose down
    +          docker image rm ghcr.io/ietf-tools/datatracker-celery:latest ghcr.io/ietf-tools/datatracker-mq:latest
               docker image tag datatracker-celery-fallback ghcr.io/ietf-tools/datatracker-celery:latest
    +          docker image tag datatracker-mq-fallback ghcr.io/ietf-tools/datatracker-mq:latest
               cd -
     
    -     If there were migrations at step 7, they will need to be reversed before the restart at step 11.
    +     If there were migrations at step 10, they will need to be reversed before the restart at step 12.
          If it's not obvious what to do to reverse the migrations, contact the dev team.
     
      
    diff --git a/dev/mq/ietf-rabbitmq-server.bash b/dev/mq/ietf-rabbitmq-server.bash
    index 212753ddc6..56effba179 100755
    --- a/dev/mq/ietf-rabbitmq-server.bash
    +++ b/dev/mq/ietf-rabbitmq-server.bash
    @@ -9,9 +9,13 @@ export RABBITMQ_PID_FILE=/tmp/rabbitmq.pid
     update_celery_password () {
       rabbitmqctl wait "${RABBITMQ_PID_FILE}" --timeout 300
       rabbitmqctl await_startup --timeout 300
    -  rabbitmqctl change_password datatracker <
    Date: Thu, 18 Aug 2022 12:24:23 -0300
    Subject: [PATCH 93/99] chore: add shutdown debugging option to celery image
    
    ---
     dev/celery/docker-init.sh | 12 ++++++++++++
     1 file changed, 12 insertions(+)
    
    diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh
    index b7b01feeb3..cc0b924bac 100755
    --- a/dev/celery/docker-init.sh
    +++ b/dev/celery/docker-init.sh
    @@ -11,6 +11,8 @@
     #   UPDATES_REQUIREMENTS_FROM - path, relative to /workspace mount, to a pip requirements
     #       file that should be installed at container startup. Default is no package install/update.
     #
    +#   DEBUG_TERM_TIMING - if non-empty, writes debug messages during shutdown after a TERM signal
    +#
     WORKSPACEDIR="/workspace"
     
     cd "$WORKSPACEDIR" || exit 255
    @@ -41,12 +43,22 @@ if [[ -n "${CELERY_GID}" ]]; then
       CELERY_WORKER_OPTS+=("--gid=${CELERY_GID}")
     fi
     
    +log_term_timing_msgs () {
    +  # output periodic debug message
    +  while true; do
    +    echo "Waiting for celery worker shutdown ($(date --utc --iso-8601=ns))"
    +    sleep 0.5s
    +  done
    +}
     
     cleanup () {
       # Cleanly terminate the celery app by sending it a TERM, then waiting for it to exit.
       if [[ -n "${celery_pid}" ]]; then
         echo "Gracefully terminating celery worker. This may take a few minutes if tasks are in progress..."
         kill -TERM "${celery_pid}"
    +    if [[ -n "${DEBUG_TERM_TIMING}" ]]; then
    +      log_term_timing_msgs &
    +    fi
         wait "${celery_pid}"
       fi
     }
    
    From 352cd2bcd91f0e73c180891d1a0844b07eedb88d Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Fri, 19 Aug 2022 14:59:20 -0300
    Subject: [PATCH 94/99] chore: add django-celery-beat package
    
    ---
     ietf/settings.py | 5 +++++
     requirements.txt | 1 +
     2 files changed, 6 insertions(+)
    
    diff --git a/ietf/settings.py b/ietf/settings.py
    index d2a6df831d..c23a34f231 100644
    --- a/ietf/settings.py
    +++ b/ietf/settings.py
    @@ -437,6 +437,7 @@ def skip_unreadable_post(record):
         'analytical',
         'django_vite',
         'django_bootstrap5',
    +    'django_celery_beat',
         'corsheaders',
         'django_markup',
         'django_password_strength',
    @@ -1179,6 +1180,10 @@ def skip_unreadable_post(record):
     # Celery configuration
     CELERY_TIMEZONE = 'UTC'
     CELERY_BROKER_URL = 'amqp://mq/'
    +CELERY_BEAT_SCHEDULER = 'django_celery_beat.schedulers:DatabaseScheduler'
    +CELERY_BEAT_SYNC_EVERY = 1  # update DB after every event
    +assert not USE_TZ, 'Drop DJANGO_CELERY_BEAT_TZ_AWARE setting once USE_TZ is True!'
    +DJANGO_CELERY_BEAT_TZ_AWARE = False
     
     # Meetecho API setup: Uncomment this and provide real credentials to enable
     # Meetecho conference creation for interim session requests
    diff --git a/requirements.txt b/requirements.txt
    index 25dfa31316..cd0d8a1168 100644
    --- a/requirements.txt
    +++ b/requirements.txt
    @@ -12,6 +12,7 @@ defusedxml>=0.7.1    # for TastyPie when using xml; not a declared dependency
     Django>=2.2.28,<3.0
     django-analytical>=3.1.0
     django-bootstrap5>=21.3
    +django-celery-beat>=2.3.0
     django-csp>=3.7
     django-cors-headers>=3.11.0
     django-debug-toolbar>=3.2.4
    
    From dbabe82180937d706c762523667af1eac296c83e Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Fri, 19 Aug 2022 16:33:28 -0300
    Subject: [PATCH 95/99] chore: run "celery beat" in datatracker-celery image
    
    ---
     dev/celery/docker-init.sh | 17 +++++++++++------
     docker-compose.yml        | 19 +++++++++++++++++++
     2 files changed, 30 insertions(+), 6 deletions(-)
    
    diff --git a/dev/celery/docker-init.sh b/dev/celery/docker-init.sh
    index cc0b924bac..9d00328ad0 100755
    --- a/dev/celery/docker-init.sh
    +++ b/dev/celery/docker-init.sh
    @@ -4,6 +4,8 @@
     #
     #   CELERY_APP - name of application to pass to celery (defaults to ietf)
     #
    +#   CELERY_ROLE - 'worker' or 'beat' (defaults to 'worker')
    +#
     #   CELERY_UID - numeric uid for the celery worker process
     #
     #   CELERY_GID - numeric gid for the celery worker process
    @@ -14,6 +16,7 @@
     #   DEBUG_TERM_TIMING - if non-empty, writes debug messages during shutdown after a TERM signal
     #
     WORKSPACEDIR="/workspace"
    +CELERY_ROLE="${CELERY_ROLE:-worker}"
     
     cd "$WORKSPACEDIR" || exit 255
     
    @@ -23,16 +26,18 @@ if [[ -n "${UPDATE_REQUIREMENTS_FROM}" ]]; then
       pip install --upgrade -r "${reqs_file}"
     fi
     
    -echo "Running initial checks..."
    -/usr/local/bin/python $WORKSPACEDIR/ietf/manage.py check
    +if [[ "${CELERY_ROLE}" == "worker" ]]; then
    +    echo "Running initial checks..."
    +    /usr/local/bin/python $WORKSPACEDIR/ietf/manage.py check
    +fi
     
    -CELERY_WORKER_OPTS=()
    +CELERY_OPTS=( "${CELERY_ROLE}" )
     if [[ -n "${CELERY_UID}" ]]; then
       # ensure that some group with the necessary GID exists in container
       if ! id "${CELERY_UID}" ; then
         adduser --system --uid "${CELERY_UID}" --no-create-home --disabled-login "celery-user-${CELERY_UID}"
       fi
    -  CELERY_WORKER_OPTS+=("--uid=${CELERY_UID}")
    +  CELERY_OPTS+=("--uid=${CELERY_UID}")
     fi
     
     if [[ -n "${CELERY_GID}" ]]; then
    @@ -40,7 +45,7 @@ if [[ -n "${CELERY_GID}" ]]; then
       if ! getent group "${CELERY_GID}" ; then
         addgroup --gid "${CELERY_GID}" "celery-group-${CELERY_GID}"
       fi
    -  CELERY_WORKER_OPTS+=("--gid=${CELERY_GID}")
    +  CELERY_OPTS+=("--gid=${CELERY_GID}")
     fi
     
     log_term_timing_msgs () {
    @@ -65,6 +70,6 @@ cleanup () {
     
     trap 'trap "" TERM; cleanup' TERM
     # start celery in the background so we can trap the TERM signal
    -celery --app="${CELERY_APP:-ietf}" worker "${CELERY_WORKER_OPTS[@]}" "$@" &
    +celery --app="${CELERY_APP:-ietf}" "${CELERY_OPTS[@]}" "$@" &
     celery_pid=$!
     wait "${celery_pid}"
    diff --git a/docker-compose.yml b/docker-compose.yml
    index 71f2821cb9..5dc40f1707 100644
    --- a/docker-compose.yml
    +++ b/docker-compose.yml
    @@ -67,6 +67,7 @@ services:
             image: ghcr.io/ietf-tools/datatracker-celery:latest
             environment:
                 CELERY_APP: ietf
    +            CELERY_ROLE: worker
                 UPDATE_REQUIREMENTS_FROM: requirements.txt
             command:
                 - '--loglevel=INFO'
    @@ -74,6 +75,24 @@ services:
                 - db
             restart: unless-stopped
             stop_grace_period: 1m
    +        volumes:
    +            - .:/workspace
    +            - app-assets:/assets
    +
    +    beat:
    +        image: datatracker-celery-test
    +        environment:
    +            CELERY_APP: ietf
    +            CELERY_ROLE: beat
    +            UPDATE_REQUIREMENTS_FROM: requirements.txt
    +        command:
    +            - '--loglevel=INFO'
    +        depends_on:
    +            - db
    +        restart: unless-stopped
    +        stop_grace_period: 1m
    +        volumes:
    +            - .:/workspace
     
     volumes:
         mariadb-data:
    
    From de863d9b69884ca748751d29b6dbc5eb0d5fdcd4 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Fri, 19 Aug 2022 17:40:27 -0300
    Subject: [PATCH 96/99] chore: fix docker image name
    
    ---
     docker-compose.yml | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/docker-compose.yml b/docker-compose.yml
    index 5dc40f1707..983a0de989 100644
    --- a/docker-compose.yml
    +++ b/docker-compose.yml
    @@ -80,7 +80,7 @@ services:
                 - app-assets:/assets
     
         beat:
    -        image: datatracker-celery-test
    +        image: ghcr.io/ietf-tools/datatracker-celery:latest
             environment:
                 CELERY_APP: ietf
                 CELERY_ROLE: beat
    
    From e96ddd2d1cf112949fa4c3950d8c5b793eac3a89 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Fri, 19 Aug 2022 18:35:16 -0300
    Subject: [PATCH 97/99] feat: add task to cancel stale submissions
    
    ---
     ietf/settings.py     |  2 ++
     ietf/submit/tasks.py | 23 ++++++++++++++++++++++-
     2 files changed, 24 insertions(+), 1 deletion(-)
    
    diff --git a/ietf/settings.py b/ietf/settings.py
    index c23a34f231..12975f061a 100644
    --- a/ietf/settings.py
    +++ b/ietf/settings.py
    @@ -844,6 +844,8 @@ def skip_unreadable_post(record):
     #    "ietf.submit.checkers.DraftYangvalidatorChecker",    
     )
     
    +# Max time to allow for validation before a submission is subject to cancellation
    +IDSUBMIT_MAX_VALIDATION_TIME = datetime.timedelta(minutes=20)
     
     IDSUBMIT_MANUAL_STAGING_DIR = '/tmp/'
     
    diff --git a/ietf/submit/tasks.py b/ietf/submit/tasks.py
    index e2c580cd9f..57eabb1fc8 100644
    --- a/ietf/submit/tasks.py
    +++ b/ietf/submit/tasks.py
    @@ -4,8 +4,12 @@
     #
     from celery import shared_task
     
    +from django.db.models import Min
    +from django.conf import settings
    +from django.utils import timezone
    +
     from ietf.submit.models import Submission
    -from ietf.submit.utils import process_uploaded_submission
    +from ietf.submit.utils import cancel_submission, create_submission_event, process_uploaded_submission
     from ietf.utils import log
     
     
    @@ -19,6 +23,23 @@ def process_uploaded_submission_task(submission_id):
             process_uploaded_submission(submission)
     
     
    +@shared_task
    +def cancel_stale_submissions():
    +    now = timezone.now()
    +    stale_submissions = Submission.objects.filter(
    +        state_id='validating',
    +    ).annotate(
    +        submitted_at=Min('submissionevent__time'),
    +    ).filter(
    +        submitted_at__lt=now - settings.IDSUBMIT_MAX_VALIDATION_TIME,
    +    )
    +    for subm in stale_submissions:
    +        age = now - subm.submitted_at
    +        log.log(f'Canceling stale submission (id={subm.id}, age={age})')
    +        cancel_submission(subm)
    +        create_submission_event(None, subm, f'Submission canceled: validation checks took too long')
    +
    +
     @shared_task(bind=True)
     def poke(self):
         log.log(f'Poked {self.name}, request id {self.request.id}')
    
    From e5f1ab5dc4d3f8f979a31e6aa24011e3fa83ee67 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Fri, 19 Aug 2022 18:50:57 -0300
    Subject: [PATCH 98/99] test: test the cancel_stale_submissions task
    
    ---
     ietf/submit/tests.py | 26 ++++++++++++++++++++++++--
     1 file changed, 24 insertions(+), 2 deletions(-)
    
    diff --git a/ietf/submit/tests.py b/ietf/submit/tests.py
    index df6bd04594..70baff4328 100644
    --- a/ietf/submit/tests.py
    +++ b/ietf/submit/tests.py
    @@ -22,8 +22,8 @@
     from django.test import override_settings
     from django.test.client import RequestFactory
     from django.urls import reverse as urlreverse
    +from django.utils import timezone
     from django.utils.encoding import force_str, force_text
    -
     import debug                            # pyflakes:ignore
     
     from ietf.submit.utils import (expirable_submissions, expire_submission, find_submission_filenames,
    @@ -47,7 +47,7 @@
     from ietf.submit.forms import SubmissionBaseUploadForm, SubmissionAutoUploadForm
     from ietf.submit.models import Submission, Preapproval, SubmissionExtResource
     from ietf.submit.mail import add_submission_email, process_response_email
    -from ietf.submit.tasks import process_uploaded_submission_task
    +from ietf.submit.tasks import cancel_stale_submissions, process_uploaded_submission_task
     from ietf.utils.accesstoken import generate_access_token
     from ietf.utils.mail import outbox, empty_outbox, get_payload_text
     from ietf.utils.models import VersionInfo
    @@ -3360,6 +3360,28 @@ def test_status_of_validating_submission(self):
             self.assertContains(r, s.name)
             self.assertContains(r, 'still being processed and validated', status_code=200)
     
    +    @override_settings(IDSUBMIT_MAX_VALIDATION_TIME=datetime.timedelta(minutes=30))
    +    def test_cancel_stale_submissions(self):
    +        fresh_submission = SubmissionFactory(state_id='validating')
    +        fresh_submission.submissionevent_set.create(
    +            desc='fake created event',
    +            time=timezone.now() - datetime.timedelta(minutes=15),
    +        )
    +        stale_submission = SubmissionFactory(state_id='validating')
    +        stale_submission.submissionevent_set.create(
    +            desc='fake created event',
    +            time=timezone.now() - datetime.timedelta(minutes=30, seconds=1),
    +        )
    +
    +        cancel_stale_submissions()
    +
    +        fresh_submission = Submission.objects.get(pk=fresh_submission.pk)
    +        self.assertEqual(fresh_submission.state_id, 'validating')
    +        self.assertEqual(fresh_submission.submissionevent_set.count(), 1)
    +
    +        stale_submission = Submission.objects.get(pk=stale_submission.pk)
    +        self.assertEqual(stale_submission.state_id, 'cancel')
    +        self.assertEqual(stale_submission.submissionevent_set.count(), 2)
     
     
     class ApiSubmitTests(BaseSubmitTestCase):
    
    From f9da62cd36637838085ce277620a8c80ade95358 Mon Sep 17 00:00:00 2001
    From: Jennifer Richards 
    Date: Mon, 22 Aug 2022 12:25:23 -0300
    Subject: [PATCH 99/99] chore: make f-string with no interpolation a plain
     string
    
    ---
     ietf/submit/tasks.py | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/ietf/submit/tasks.py b/ietf/submit/tasks.py
    index 57eabb1fc8..21d4275b75 100644
    --- a/ietf/submit/tasks.py
    +++ b/ietf/submit/tasks.py
    @@ -37,7 +37,7 @@ def cancel_stale_submissions():
             age = now - subm.submitted_at
             log.log(f'Canceling stale submission (id={subm.id}, age={age})')
             cancel_submission(subm)
    -        create_submission_event(None, subm, f'Submission canceled: validation checks took too long')
    +        create_submission_event(None, subm, 'Submission canceled: validation checks took too long')
     
     
     @shared_task(bind=True)