Skip to content

Commit 2c1438c

Browse files
committed
Moved unidecode_name from utils.text to person.name.
Modified UserFactory to use a new locale for each new user, instead of the same locale for a whole test run. This (almost) ensures the exercise of code to deal with non-ascii names, something which would not happen if a locale with ascii names was chosen at the start of a run. Modified name.initials() to not use non-word characters as initials. Modified unidecode_name() to do more normalization, to conform to the conventions used in internet-drafts. Added saving of the factory-boy random state in order to be able to re-run a test suite with the same pseudo-random sequence as in a previous failed run. Fixed an issue with email formatting in test_api_submit_ok(). Modified the draft author extraction code to deal better with names with embedded apostrophes. - Legacy-Id: 14141
1 parent a440a30 commit 2c1438c

13 files changed

Lines changed: 89 additions & 27 deletions

File tree

ietf/nomcom/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from ietf.utils.pipe import pipe
2323
from ietf.utils.mail import send_mail_text, send_mail
2424
from ietf.utils.log import log
25-
from ietf.utils.text import unidecode_name
25+
from ietf.person.name import unidecode_name
2626

2727
import debug # pyflakes:ignore
2828

ietf/person/factories.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,23 @@
1313
import debug # pyflakes:ignore
1414

1515
from ietf.person.models import Person, Alias, Email
16-
from ietf.utils.text import unidecode_name
16+
from ietf.person.name import unidecode_name
1717

1818

1919
fake = faker.Factory.create()
2020

21+
def random_faker():
22+
return faker.Faker(random.sample(faker.config.AVAILABLE_LOCALES, 1)[0])
23+
2124
class UserFactory(factory.DjangoModelFactory):
2225
class Meta:
2326
model = User
2427
django_get_or_create = ('username',)
25-
exclude = ['locale', ]
28+
exclude = ['faker', ]
2629

27-
locale = random.sample(faker.config.AVAILABLE_LOCALES, 1)[0]
28-
first_name = factory.Faker('first_name', locale)
29-
last_name = factory.Faker('last_name', locale)
30+
faker = factory.LazyFunction(random_faker)
31+
first_name = factory.LazyAttribute(lambda o: o.faker.first_name())
32+
last_name = factory.LazyAttribute(lambda o: o.faker.last_name())
3033
email = factory.LazyAttributeSequence(lambda u, n: '%s.%s_%d@%s'%( slugify(unidecode(u.first_name)),
3134
slugify(unidecode(u.last_name)), n, fake.domain_name()))
3235
username = factory.LazyAttribute(lambda u: u.email)

ietf/person/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from ietf.utils.mail import send_mail_preformatted
2121
from ietf.utils.storage import NoLocationMigrationFileSystemStorage
2222
from ietf.utils.mail import formataddr
23-
from ietf.utils.text import unidecode_name
23+
from ietf.person.name import unidecode_name
2424

2525

2626
class PersonInfo(models.Model):

ietf/person/name.py

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
import re
2+
import unidecode
23

34
import debug # pyflakes:ignore
45

6+
7+
def name_particle_match(name):
8+
return re.search(r" (af|al|Al|de|der|di|Di|du|el|El|Hadi|in 't|Le|st\.?|St\.?|ten|ter|van|van der|Van|von|von der|Von|zu) ", name)
9+
510
def name_parts(name):
611
prefix, first, middle, last, suffix = u"", u"", u"", u"", u""
712

@@ -36,7 +41,7 @@ def name_parts(name):
3641
full = full.lower() # adjust case for all-uppercase input
3742
# This is an incomplete list. Adjust as needed to handle known ietf
3843
# participant names correctly:
39-
particle = re.search(r" (af|al|Al|de|der|di|Di|du|el|El|Hadi|in 't|Le|st\.?|St\.?|ten|ter|van|van der|Van|von|von der|Von|zu) ", full)
44+
particle = name_particle_match(full)
4045
if particle:
4146
pos = particle.start()
4247
parts = full[:pos].split() + [full[pos+1:]]
@@ -52,19 +57,63 @@ def name_parts(name):
5257
else:
5358
last = parts[0]
5459
return prefix, first, middle, last, suffix
55-
60+
5661
def initials(name):
5762
prefix, first, middle, last, suffix = name_parts(name)
5863
given = first
5964
if middle:
6065
given += u" "+middle
61-
initials = u" ".join([ n[0]+'.' for n in given.split() ])
66+
# Don't use non-word characters as initials.
67+
# Example: The Bulgarian transcribed name "'Rnest Balkanska" should not have an initial of "'".
68+
given = re.sub('[^ .\w]', '', given)
69+
initials = u" ".join([ n[0].upper()+'.' for n in given.split() ])
6270
return initials
6371

6472
def plain_name(name):
6573
prefix, first, middle, last, suffix = name_parts(name)
6674
return u" ".join([first, last])
6775

76+
def capfirst(s):
77+
# Capitalize the first word character, skipping non-word characters and
78+
# leaving following word characters untouched:
79+
letters = list(s)
80+
for i,l in enumerate(letters):
81+
if l.isalpha():
82+
letters[i] = l.capitalize()
83+
break
84+
return ''.join(letters)
85+
86+
def unidecode_name(uname):
87+
"""
88+
unidecode() of cjk ideograms can produce strings which contain spaces.
89+
Strip leading and trailing spaces, and reduce double-spaces to single.
90+
91+
For some other ranges, unidecode returns all-lowercase names; fix these
92+
up with capitalization.
93+
"""
94+
# Fix double spacing
95+
name = unidecode.unidecode(uname)
96+
if name == uname:
97+
return name
98+
name = name.strip().replace(' ', ' ')
99+
# Fix all-upper and all-lower names:
100+
# Check for name particles -- don't capitalize those
101+
m = name_particle_match(name)
102+
particle = m.group(1) if m else None
103+
# Get the name parts
104+
prefix, first, middle, last, suffix = name_parts(name)
105+
# Capitalize names
106+
first = capfirst(first)
107+
middle = ' '.join([ capfirst(p) for p in middle.split() ])
108+
last = ' '.join([ capfirst(p) for p in last.split() ])
109+
# Restore the particle, if any
110+
if particle and last.startswith(capfirst(particle)+' '):
111+
last = ' '.join([ particle, last[len(particle)+1:] ])
112+
# Recombine the parts
113+
parts = prefix, first, middle, last, suffix
114+
name = ' '.join([ p for p in parts if p and p.strip() != '' ])
115+
return name
116+
68117
if __name__ == "__main__":
69118
import sys
70119
name = u" ".join(sys.argv[1:])

ietf/review/import_from_review_tool.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from ietf.doc.models import Document, DocAlias, ReviewRequestDocEvent, NewRevisionDocEvent, DocTypeName, State
2626
from ietf.utils.text import strip_prefix, xslugify
2727
from ietf.review.utils import possibly_advance_next_reviewer_for_team
28-
from ietf.utils.text import unidecode_name
28+
from ietf.person.name import unidecode_name
2929

3030
parser = argparse.ArgumentParser()
3131
parser.add_argument("database", help="database must be included in settings")

ietf/settings.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,9 @@ def skip_unreadable_post(record):
920920

921921
STATS_NAMES_LIMIT = 25
922922

923+
UTILS_TEST_RANDOM_STATE_FILE = '.factoryboy_random_state'
924+
925+
923926
# Put the production SECRET_KEY in settings_local.py, and also any other
924927
# sensitive or site-specific changes. DO NOT commit settings_local.py to svn.
925928
from settings_local import * # pyflakes:ignore pylint: disable=wildcard-import

ietf/stats/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias, MeetingRegistration
99
from ietf.name.models import CountryName
1010
from ietf.person.models import Person, Email, Alias
11-
from ietf.utils.text import unidecode_name
11+
from ietf.person.name import unidecode_name
1212

1313

1414
def compile_affiliation_ending_stripping_regexp():

ietf/submit/tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1588,7 +1588,7 @@ def test_api_submit_bad_method(self):
15881588

15891589
def test_api_submit_ok(self):
15901590
r, author, name = self.post_submission('00')
1591-
expected = "Upload of %s OK, confirmation requests sent to:\n %s" % (name, author.formatted_email())
1591+
expected = "Upload of %s OK, confirmation requests sent to:\n %s" % (name, author.formatted_email().replace('\n',''))
15921592
self.assertContains(r, expected, status_code=200)
15931593

15941594
def test_api_submit_no_user(self):

ietf/submit/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from ietf.utils.accesstoken import generate_random_key
3131
from ietf.utils.draft import Draft
3232
from ietf.utils.mail import is_valid_email
33-
from ietf.utils.text import unidecode_name
33+
from ietf.person.name import unidecode_name
3434

3535

3636
def validate_submission(submission):

ietf/utils/draft.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -509,8 +509,8 @@ def extract_authors(self):
509509
"honor" : r"(?:[A-Z]\.|Dr\.?|Dr\.-Ing\.|Prof(?:\.?|essor)|Sir|Lady|Dame|Sri)",
510510
"prefix": r"([Dd]e|Hadi|van|van de|van der|Ver|von|[Ee]l)",
511511
"suffix": r"(jr.?|Jr.?|II|2nd|III|3rd|IV|4th)",
512-
"first" : r"([A-Z][-A-Za-z]*)(( ?\([A-Z][-A-Za-z]*\))?(\.?[- ]{1,2}[A-Za-z]+)*)",
513-
"last" : r"([-A-Za-z']{2,})",
512+
"first" : r"([A-Z][-A-Za-z'`]*)(( ?\([A-Z][-A-Za-z'`]*\))?(\.?[- ]{1,2}[A-Za-z'`]+)*)",
513+
"last" : r"([-A-Za-z'`]{2,})",
514514
"months": r"(January|February|March|April|May|June|July|August|September|October|November|December)",
515515
"mabbr" : r"(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\.?",
516516
}
@@ -575,7 +575,7 @@ def dotexp(s):
575575

576576
# permit insertion of middle names between first and last, and
577577
# add possible honorific and suffix information
578-
authpat = r"(?:^| and )(?:%(hon)s ?)?(%(first)s\S*( +[^ ]+)* +%(last)s)( *\(.*|,( [A-Z][-A-Za-z0-9]*)?| %(suffix)s| [A-Z][a-z]+)?" % {"hon":hon, "first":first, "last":last, "suffix":suffix,}
578+
authpat = r"(?:^| and )(?:%(hon)s ?)?([`']?%(first)s\S*( +[^ ]+)* +%(last)s)( *\(.*|,( [A-Z][-A-Za-z0-9]*)?| %(suffix)s| [A-Z][a-z]+)?" % {"hon":hon, "first":first, "last":last, "suffix":suffix,}
579579
return authpat
580580

581581
authors = []

0 commit comments

Comments
 (0)