Skip to content

Commit cf62b46

Browse files
Find references from submitted XML instead of rendering to text and parsing. Fixes ietf-tools#3342. Commit ready for merge.
- Legacy-Id: 19825
1 parent 5bf0638 commit cf62b46

13 files changed

Lines changed: 689 additions & 45 deletions

bin/add-old-drafts-from-archive.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from django.conf import settings
1616
from django.core.validators import validate_email, ValidationError
17-
from ietf.utils.draft import Draft
17+
from ietf.utils.draft import PlaintextDraft
1818
from ietf.submit.utils import update_authors
1919

2020
import debug # pyflakes:ignore
@@ -61,7 +61,7 @@
6161
except UnicodeDecodeError:
6262
text = raw.decode('latin1')
6363
try:
64-
draft = Draft(text, txt_file.name, name_from_source=True)
64+
draft = PlaintextDraft(text, txt_file.name, name_from_source=True)
6565
except Exception as e:
6666
print name, rev, "Can't parse", p,":",e
6767
continue

ietf/doc/tests_utils.py

Lines changed: 146 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,22 @@
11
# Copyright The IETF Trust 2020, All Rights Reserved
22
import datetime
3+
import debug # pyflakes:ignore
4+
5+
from unittest.mock import patch
36

47
from django.db import IntegrityError
58

69
from ietf.group.factories import GroupFactory, RoleFactory
710
from ietf.name.models import DocTagName
811
from ietf.person.factories import PersonFactory
9-
from ietf.utils.test_utils import TestCase
12+
from ietf.utils.test_utils import TestCase, name_of_file_containing
1013
from ietf.person.models import Person
11-
from ietf.doc.factories import DocumentFactory, WgRfcFactory
14+
from ietf.doc.factories import DocumentFactory, WgRfcFactory, WgDraftFactory
1215
from ietf.doc.models import State, DocumentActionHolder, DocumentAuthor, Document
13-
from ietf.doc.utils import update_action_holders, add_state_change_event, update_documentauthors, fuzzy_find_documents
16+
from ietf.doc.utils import (update_action_holders, add_state_change_event, update_documentauthors,
17+
fuzzy_find_documents, rebuild_reference_relations)
18+
from ietf.utils.draft import Draft, PlaintextDraft
19+
from ietf.utils.xmldraft import XMLDraft
1420

1521

1622
class ActionHoldersTests(TestCase):
@@ -285,3 +291,140 @@ def test_fuzzy_find_documents(self):
285291
self.do_fuzzy_find_documents_rfc_test('draft-name-with-number-01')
286292
self.do_fuzzy_find_documents_rfc_test('draft-name-that-has-two-02-04')
287293
self.do_fuzzy_find_documents_rfc_test('draft-wild-01-numbers-0312')
294+
295+
296+
class RebuildReferenceRelationsTests(TestCase):
297+
def setUp(self):
298+
super().setUp()
299+
self.doc = WgDraftFactory() # document under test
300+
# Other documents that should be found by rebuild_reference_relations
301+
self.normative, self.informative, self.unknown = WgRfcFactory.create_batch(3)
302+
for relationship in ['refnorm', 'refinfo', 'refunk', 'refold']:
303+
self.doc.relateddocument_set.create(
304+
target=WgRfcFactory().docalias.first(),
305+
relationship_id=relationship,
306+
)
307+
self.updated = WgRfcFactory() # related document that should be left alone
308+
self.doc.relateddocument_set.create(target=self.updated.docalias.first(), relationship_id='updates')
309+
self.assertCountEqual(self.doc.relateddocument_set.values_list('relationship__slug', flat=True),
310+
['refnorm', 'refinfo', 'refold', 'refunk', 'updates'],
311+
'Test conditions set up incorrectly: wrong prior document relationships')
312+
for other_doc in [self.normative, self.informative, self.unknown]:
313+
self.assertEqual(
314+
self.doc.relateddocument_set.filter(target__name=other_doc.canonical_name()).count(),
315+
0,
316+
'Test conditions set up incorrectly: new documents already related',
317+
)
318+
319+
def _get_refs_return_value(self):
320+
return {
321+
self.normative.canonical_name(): Draft.REF_TYPE_NORMATIVE,
322+
self.informative.canonical_name(): Draft.REF_TYPE_INFORMATIVE,
323+
self.unknown.canonical_name(): Draft.REF_TYPE_UNKNOWN,
324+
'draft-not-found': Draft.REF_TYPE_NORMATIVE,
325+
}
326+
327+
def test_requires_txt_or_xml(self):
328+
result = rebuild_reference_relations(self.doc, {})
329+
self.assertCountEqual(result.keys(), ['errors'])
330+
self.assertEqual(len(result['errors']), 1)
331+
self.assertIn('No draft text available', result['errors'][0],
332+
'Error should be reported if no draft file is given')
333+
334+
result = rebuild_reference_relations(self.doc, {'md': 'cant-do-this.md'})
335+
self.assertCountEqual(result.keys(), ['errors'])
336+
self.assertEqual(len(result['errors']), 1)
337+
self.assertIn('No draft text available', result['errors'][0],
338+
'Error should be reported if no XML or plaintext file is given')
339+
340+
@patch.object(XMLDraft, 'get_refs')
341+
@patch.object(XMLDraft, '__init__', return_value=None)
342+
def test_xml(self, mock_init, mock_get_refs):
343+
"""Should build reference relations with only XML"""
344+
mock_get_refs.return_value = self._get_refs_return_value()
345+
346+
result = rebuild_reference_relations(self.doc, {'xml': 'file.xml'})
347+
348+
# if the method of calling the XMLDraft() constructor changes, this will need to be updated
349+
xmldraft_init_args, _ = mock_init.call_args
350+
self.assertEqual(xmldraft_init_args, ('file.xml',), 'XMLDraft initialized with unexpected arguments')
351+
self.assertEqual(
352+
result,
353+
{
354+
'warnings': ['There were 1 references with no matching DocAlias'],
355+
'unfound': ['draft-not-found'],
356+
}
357+
)
358+
359+
self.assertCountEqual(
360+
self.doc.relateddocument_set.values_list('target__name', 'relationship__slug'),
361+
[
362+
(self.normative.canonical_name(), 'refnorm'),
363+
(self.informative.canonical_name(), 'refinfo'),
364+
(self.unknown.canonical_name(), 'refunk'),
365+
(self.updated.docalias.first().name, 'updates'),
366+
]
367+
)
368+
369+
@patch.object(PlaintextDraft, 'get_refs')
370+
@patch.object(PlaintextDraft, '__init__', return_value=None)
371+
def test_plaintext(self, mock_init, mock_get_refs):
372+
"""Should build reference relations with only plaintext"""
373+
mock_get_refs.return_value = self._get_refs_return_value()
374+
375+
with name_of_file_containing('contents') as temp_file_name:
376+
result = rebuild_reference_relations(self.doc, {'txt': temp_file_name})
377+
378+
# if the method of calling the PlaintextDraft() constructor changes, this test will need to be updated
379+
_, mock_init_kwargs = mock_init.call_args
380+
self.assertEqual(mock_init_kwargs, {'text': 'contents', 'source': temp_file_name},
381+
'PlaintextDraft initialized with unexpected arguments')
382+
self.assertEqual(
383+
result,
384+
{
385+
'warnings': ['There were 1 references with no matching DocAlias'],
386+
'unfound': ['draft-not-found'],
387+
}
388+
)
389+
390+
self.assertCountEqual(
391+
self.doc.relateddocument_set.values_list('target__name', 'relationship__slug'),
392+
[
393+
(self.normative.canonical_name(), 'refnorm'),
394+
(self.informative.canonical_name(), 'refinfo'),
395+
(self.unknown.canonical_name(), 'refunk'),
396+
(self.updated.docalias.first().name, 'updates'),
397+
]
398+
)
399+
400+
@patch.object(PlaintextDraft, '__init__')
401+
@patch.object(XMLDraft, 'get_refs')
402+
@patch.object(XMLDraft, '__init__', return_value=None)
403+
def test_xml_and_plaintext(self, mock_init, mock_get_refs, mock_plaintext_init):
404+
"""Should build reference relations with XML when plaintext also available"""
405+
mock_get_refs.return_value = self._get_refs_return_value()
406+
407+
result = rebuild_reference_relations(self.doc, {'txt': 'file.txt', 'xml': 'file.xml'})
408+
409+
self.assertFalse(mock_plaintext_init.called, 'PlaintextDraft should not be used when XML is available')
410+
411+
# if the method of calling the XMLDraft() constructor changes, this will need to be updated
412+
xmldraft_init_args, _ = mock_init.call_args
413+
self.assertEqual(xmldraft_init_args, ('file.xml',), 'XMLDraft initialized with unexpected arguments')
414+
self.assertEqual(
415+
result,
416+
{
417+
'warnings': ['There were 1 references with no matching DocAlias'],
418+
'unfound': ['draft-not-found'],
419+
}
420+
)
421+
422+
self.assertCountEqual(
423+
self.doc.relateddocument_set.values_list('target__name', 'relationship__slug'),
424+
[
425+
(self.normative.canonical_name(), 'refnorm'),
426+
(self.informative.canonical_name(), 'refinfo'),
427+
(self.unknown.canonical_name(), 'refunk'),
428+
(self.updated.docalias.first().name, 'updates'),
429+
]
430+
)

ietf/doc/utils.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@
3939
from ietf.utils.mail import send_mail
4040
from ietf.mailtrigger.utils import gather_address_lists
4141
from ietf.utils import log
42+
from ietf.utils.xmldraft import XMLDraft
43+
4244

4345
def save_document_in_history(doc):
4446
"""Save a snapshot of document and related objects in the database."""
@@ -742,28 +744,33 @@ def update_telechat(request, doc, by, new_telechat_date, new_returning_item=None
742744

743745
return e
744746

745-
def rebuild_reference_relations(doc,filename=None):
747+
def rebuild_reference_relations(doc, filenames):
748+
"""Rebuild reference relations for a document
749+
750+
filenames should be a dict mapping file ext (i.e., type) to the full path of each file.
751+
"""
746752
if doc.type.slug != 'draft':
747753
return None
748754

749-
if not filename:
750-
if doc.get_state_slug() == 'rfc':
751-
filename=os.path.join(settings.RFC_PATH,doc.canonical_name()+".txt")
752-
else:
753-
filename=os.path.join(settings.INTERNET_DRAFT_PATH,doc.filename_with_rev())
754-
755-
try:
756-
with io.open(filename, 'rb') as file:
757-
refs = draft.Draft(file.read().decode('utf8'), filename).get_refs()
758-
except IOError as e:
759-
return { 'errors': ["%s :%s" % (e.strerror, filename)] }
755+
# try XML first
756+
if 'xml' in filenames:
757+
refs = XMLDraft(filenames['xml']).get_refs()
758+
elif 'txt' in filenames:
759+
filename = filenames['txt']
760+
try:
761+
refs = draft.PlaintextDraft.from_file(filename).get_refs()
762+
except IOError as e:
763+
return { 'errors': ["%s :%s" % (e.strerror, filename)] }
764+
else:
765+
return {'errors': ['No draft text available for rebuilding reference relations. Need XML or plaintext.']}
760766

761767
doc.relateddocument_set.filter(relationship__slug__in=['refnorm','refinfo','refold','refunk']).delete()
762768

763769
warnings = []
764770
errors = []
765771
unfound = set()
766772
for ( ref, refType ) in refs.items():
773+
# As of Dec 2021, DocAlias has a unique constraint on the name field, so count > 1 should not occur
767774
refdoc = DocAlias.objects.filter( name=ref )
768775
count = refdoc.count()
769776
if count == 0:

ietf/doc/views_doc.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@
8080
from ietf.review.utils import can_request_review_of_doc, review_assignments_to_list_for_docs
8181
from ietf.review.utils import no_review_from_teams_on_doc
8282
from ietf.utils import markup_txt, log, markdown
83-
from ietf.utils.draft import Draft
83+
from ietf.utils.draft import PlaintextDraft
8484
from ietf.utils.response import permission_denied
8585
from ietf.utils.text import maybe_split
8686

@@ -1842,7 +1842,7 @@ def idnits2_state(request, name, rev=None):
18421842
else:
18431843
text = doc.text()
18441844
if text:
1845-
parsed_draft = Draft(text=doc.text(), source=name, name_from_source=False)
1845+
parsed_draft = PlaintextDraft(text=doc.text(), source=name, name_from_source=False)
18461846
doc.deststatus = parsed_draft.get_status()
18471847
else:
18481848
doc.deststatus="Unknown"

ietf/stats/backfill_data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
from ietf.doc.models import Document
3131
from ietf.name.models import FormalLanguageName
32-
from ietf.utils.draft import Draft
32+
from ietf.utils.draft import PlaintextDraft
3333

3434
parser = argparse.ArgumentParser()
3535
parser.add_argument("--document", help="specific document name")
@@ -89,7 +89,7 @@ def unicode(text):
8989
with io.open(path, 'rb') as f:
9090
say("\nProcessing %s" % doc.name)
9191
sys.stdout.flush()
92-
d = Draft(unicode(f.read()), path)
92+
d = PlaintextDraft(unicode(f.read()), path)
9393

9494
updated = False
9595

ietf/submit/forms.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from ietf.submit.parsers.plain_parser import PlainParser
3939
from ietf.submit.parsers.xml_parser import XMLParser
4040
from ietf.utils import log
41-
from ietf.utils.draft import Draft
41+
from ietf.utils.draft import PlaintextDraft
4242
from ietf.utils.text import normalize_text
4343

4444
class SubmissionBaseUploadForm(forms.Form):
@@ -302,7 +302,7 @@ def cleanup(): # called when context exited, even in case of exception
302302
try:
303303
text = bytes.decode(self.file_info['txt'].charset)
304304
#
305-
self.parsed_draft = Draft(text, txt_file.name)
305+
self.parsed_draft = PlaintextDraft(text, txt_file.name)
306306
if self.filename == None:
307307
self.filename = self.parsed_draft.filename
308308
elif self.filename != self.parsed_draft.filename:

ietf/submit/tests.py

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,19 @@
1313
from io import StringIO
1414
from pyquery import PyQuery
1515

16+
from pathlib import Path
17+
1618
from django.conf import settings
19+
from django.test import override_settings
20+
from django.test.client import RequestFactory
1721
from django.urls import reverse as urlreverse
1822
from django.utils.encoding import force_str, force_text
1923

2024
import debug # pyflakes:ignore
2125

22-
from ietf.submit.utils import expirable_submissions, expire_submission
23-
from ietf.doc.factories import DocumentFactory, WgDraftFactory, IndividualDraftFactory
26+
from ietf.submit.utils import (expirable_submissions, expire_submission, find_submission_filenames,
27+
post_submission)
28+
from ietf.doc.factories import DocumentFactory, WgDraftFactory, IndividualDraftFactory, IndividualRfcFactory
2429
from ietf.doc.models import ( Document, DocAlias, DocEvent, State,
2530
BallotPositionDocEvent, DocumentAuthor, SubmissionDocEvent )
2631
from ietf.doc.utils import create_ballot_if_not_open, can_edit_docextresources, update_action_holders
@@ -40,7 +45,7 @@
4045
from ietf.utils.mail import outbox, empty_outbox, get_payload_text
4146
from ietf.utils.models import VersionInfo
4247
from ietf.utils.test_utils import login_testing_unauthorized, TestCase
43-
from ietf.utils.draft import Draft
48+
from ietf.utils.draft import PlaintextDraft
4449

4550

4651
class BaseSubmitTestCase(TestCase):
@@ -2860,10 +2865,62 @@ def test_draft_refs_identification(self):
28602865

28612866
group = None
28622867
file, __ = submission_file('draft-some-subject', '00', group, 'txt', "test_submission.txt", )
2863-
draft = Draft(file.read(), file.name)
2868+
draft = PlaintextDraft(file.read(), file.name)
28642869
refs = draft.get_refs()
28652870
self.assertEqual(refs['rfc2119'], 'norm')
28662871
self.assertEqual(refs['rfc8174'], 'norm')
28672872
self.assertEqual(refs['rfc8126'], 'info')
28682873
self.assertEqual(refs['rfc8175'], 'info')
2869-
2874+
2875+
2876+
class PostSubmissionTests(BaseSubmitTestCase):
2877+
@override_settings(RFC_FILE_TYPES=('txt', 'xml'), IDSUBMIT_FILE_TYPES=('pdf', 'md'))
2878+
def test_find_submission_filenames_rfc(self):
2879+
"""Posting an RFC submission should use RFC_FILE_TYPES"""
2880+
rfc = IndividualRfcFactory()
2881+
path = Path(self.staging_dir)
2882+
for ext in ['txt', 'xml', 'pdf', 'md']:
2883+
(path / f'{rfc.name}-{rfc.rev}.{ext}').touch()
2884+
files = find_submission_filenames(rfc)
2885+
self.assertCountEqual(
2886+
files,
2887+
{
2888+
'txt': f'{path}/{rfc.name}-{rfc.rev}.txt',
2889+
'xml': f'{path}/{rfc.name}-{rfc.rev}.xml',
2890+
# should NOT find the pdf or md
2891+
}
2892+
)
2893+
2894+
@override_settings(RFC_FILE_TYPES=('txt', 'xml'), IDSUBMIT_FILE_TYPES=('pdf', 'md'))
2895+
def test_find_submission_filenames_draft(self):
2896+
"""Posting an I-D submission should use IDSUBMIT_FILE_TYPES"""
2897+
draft = WgDraftFactory()
2898+
path = Path(self.staging_dir)
2899+
for ext in ['txt', 'xml', 'pdf', 'md']:
2900+
(path / f'{draft.name}-{draft.rev}.{ext}').touch()
2901+
files = find_submission_filenames(draft)
2902+
self.assertCountEqual(
2903+
files,
2904+
{
2905+
'pdf': f'{path}/{draft.name}-{draft.rev}.pdf',
2906+
'md': f'{path}/{draft.name}-{draft.rev}.md',
2907+
# should NOT find the txt or xml
2908+
}
2909+
)
2910+
2911+
@mock.patch('ietf.submit.utils.rebuild_reference_relations')
2912+
@mock.patch('ietf.submit.utils.find_submission_filenames')
2913+
def test_post_submission_rebuilds_ref_relations(self, mock_find_filenames, mock_rebuild_reference_relations):
2914+
"""The post_submission method should rebuild reference relations from correct files
2915+
2916+
This tests that the post_submission() utility function gets the list of files to handle from the
2917+
find_submission_filenames() method and passes them along to rebuild_reference_relations().
2918+
"""
2919+
submission = SubmissionFactory()
2920+
mock_find_filenames.return_value = {'xml': f'{self.staging_dir}/{submission.name}.xml'}
2921+
request = RequestFactory()
2922+
request.user = PersonFactory().user
2923+
post_submission(request, submission, 'doc_desc', 'subm_desc')
2924+
args, kwargs = mock_rebuild_reference_relations.call_args
2925+
self.assertEqual(args[1], mock_find_filenames.return_value)
2926+

0 commit comments

Comments
 (0)