Skip to content

Commit 33b275b

Browse files
committed
Added ietf.utils.text.unidecode_name() and replaced various uses of unidecode() with it, in order to normalize the generation of ascii versions of names, to avoid different practices in space stripping and space normalization in different parts of the code.
- Legacy-Id: 14128
1 parent 6aa2cfc commit 33b275b

8 files changed

Lines changed: 30 additions & 19 deletions

File tree

ietf/nomcom/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
from ietf.person.models import Email, Person
2121
from ietf.mailtrigger.utils import gather_address_lists
2222
from ietf.utils.pipe import pipe
23-
from unidecode import unidecode
2423
from ietf.utils.mail import send_mail_text, send_mail
2524
from ietf.utils.log import log
25+
from ietf.utils.text import unidecode_name
2626

2727
import debug # pyflakes:ignore
2828

@@ -365,7 +365,7 @@ def make_nomineeposition_for_newperson(nomcom, candidate_name, candidate_email,
365365
# This is expected to fail if called with an existing email address
366366
email = Email.objects.create(address=candidate_email)
367367
person = Person.objects.create(name=candidate_name,
368-
ascii=unidecode(candidate_name),
368+
ascii=unidecode_name(candidate_name),
369369
address=candidate_email)
370370
email.person = person
371371
email.save()

ietf/person/factories.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
import debug # pyflakes:ignore
1414

1515
from ietf.person.models import Person, Alias, Email
16+
from ietf.utils.text import unidecode_name
17+
1618

1719
fake = faker.Factory.create()
1820

@@ -39,7 +41,7 @@ class Meta:
3941

4042
user = factory.SubFactory(UserFactory)
4143
name = factory.LazyAttribute(lambda p: u'%s %s'%(p.user.first_name,p.user.last_name))
42-
ascii = factory.LazyAttribute(lambda p: unicode(unidecode(p.name).strip()))
44+
ascii = factory.LazyAttribute(lambda p: unicode(unidecode_name(p.name)))
4345

4446
class Params:
4547
with_bio = factory.Trait(biography = u"\n\n".join(fake.paragraphs()))

ietf/person/models.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import email.utils
55
import email.header
66
from hashids import Hashids
7-
from unidecode import unidecode
87
from urlparse import urljoin
98

109
from django.conf import settings
@@ -21,6 +20,7 @@
2120
from ietf.utils.mail import send_mail_preformatted
2221
from ietf.utils.storage import NoLocationMigrationFileSystemStorage
2322
from ietf.utils.mail import formataddr
23+
from ietf.utils.text import unidecode_name
2424

2525

2626
class PersonInfo(models.Model):
@@ -61,18 +61,18 @@ def ascii_name(self):
6161
# we're validating the content of the ascii field, and have
6262
# verified that the field is ascii clean in the database:
6363
if not all(ord(c) < 128 for c in self.ascii):
64-
self._cached_ascii_name = unidecode(self.ascii).strip()
64+
self._cached_ascii_name = unidecode_name(self.ascii)
6565
else:
6666
self._cached_ascii_name = self.ascii
6767
else:
68-
self._cached_ascii_name = unidecode(self.plain_name()).strip()
68+
self._cached_ascii_name = unidecode_name(self.plain_name())
6969
return self._cached_ascii_name
7070
def plain_ascii(self):
7171
if not hasattr(self, '_cached_plain_ascii'):
7272
if self.ascii:
73-
ascii = unidecode(self.ascii).strip()
73+
ascii = unidecode_name(self.ascii)
7474
else:
75-
ascii = unidecode(self.name).strip()
75+
ascii = unidecode_name(self.name)
7676
prefix, first, middle, last, suffix = name_parts(ascii)
7777
self._cached_plain_ascii = u" ".join([first, last])
7878
return self._cached_plain_ascii

ietf/review/import_from_review_tool.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python
22

33
import sys, os
4+
import argparse
45

56
# boilerplate
67
basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
@@ -24,8 +25,7 @@
2425
from ietf.doc.models import Document, DocAlias, ReviewRequestDocEvent, NewRevisionDocEvent, DocTypeName, State
2526
from ietf.utils.text import strip_prefix, xslugify
2627
from ietf.review.utils import possibly_advance_next_reviewer_for_team
27-
import argparse
28-
from unidecode import unidecode
28+
from ietf.utils.text import unidecode_name
2929

3030
parser = argparse.ArgumentParser()
3131
parser.add_argument("database", help="database must be included in settings")
@@ -92,7 +92,7 @@ def parse_timestamp(t):
9292
if not email:
9393
person = Person.objects.filter(alias__name=row.name).first()
9494
if not person:
95-
person, created = Person.objects.get_or_create(name=row.name, ascii=unidecode(row.name))
95+
person, created = Person.objects.get_or_create(name=row.name, ascii=unidecode_name(row.name))
9696
if created:
9797
print "created person", unicode(person).encode("utf-8")
9898
existing_aliases = set(Alias.objects.filter(person=person).values_list("name", flat=True))

ietf/stats/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
from collections import defaultdict
44

55
from django.conf import settings
6+
from django.contrib.auth.models import User
67

78
from ietf.stats.models import AffiliationAlias, AffiliationIgnoredEnding, CountryAlias, MeetingRegistration
89
from ietf.name.models import CountryName
910
from ietf.person.models import Person, Email, Alias
10-
from django.contrib.auth.models import User
11-
from unidecode import unidecode
11+
from ietf.utils.text import unidecode_name
1212

1313

1414
def compile_affiliation_ending_stripping_regexp():
@@ -269,7 +269,7 @@ def get_meeting_registration_data(meeting):
269269
last_name = last_name.capitalize()
270270
regname = "%s %s" % (first_name, last_name)
271271
# if there are any unicode characters decode the string to ascii
272-
ascii_name = unidecode(regname).strip()
272+
ascii_name = unidecode_name(regname)
273273

274274
# Create a new user object if it does not exist already
275275
# if the user already exists do not try to create a new one

ietf/submit/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import datetime
55
import six # pyflakes:ignore
66
import xml2rfc
7-
from unidecode import unidecode
87

98
from django.conf import settings
109
from django.core.validators import validate_email, ValidationError
@@ -31,6 +30,7 @@
3130
from ietf.utils.accesstoken import generate_random_key
3231
from ietf.utils.draft import Draft
3332
from ietf.utils.mail import is_valid_email
33+
from ietf.utils.text import unidecode_name
3434

3535

3636
def validate_submission(submission):
@@ -407,7 +407,7 @@ def ensure_person_email_info_exists(name, email):
407407
person = Person()
408408
person.name = name
409409
log.assertion('isinstance(person.name, six.text_type)')
410-
person.ascii = unidecode(person.name).decode('ascii')
410+
person.ascii = unidecode_name(person.name).decode('ascii')
411411
person.save()
412412

413413
# make sure we have an email address

ietf/utils/test_data.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from __future__ import unicode_literals
55

66
import datetime
7-
from unidecode import unidecode
87

98
from django.conf import settings
109
from django.contrib.auth.models import User
@@ -21,6 +20,8 @@
2120
from ietf.person.models import Person, Email
2221
from ietf.group.utils import setup_default_community_list_for_group
2322
from ietf.review.models import (ReviewRequest, ReviewerSettings, ReviewResultName, ReviewTypeName, ReviewTeamSettings )
23+
from ietf.utils.text import unidecode_name
24+
2425

2526
def create_person(group, role_name, name=None, username=None, email_address=None, password=None, is_staff=False, is_superuser=False):
2627
"""Add person/user/email and role."""
@@ -36,7 +37,7 @@ def create_person(group, role_name, name=None, username=None, email_address=None
3637
user = User.objects.create(username=username,is_staff=is_staff,is_superuser=is_superuser)
3738
user.set_password(password)
3839
user.save()
39-
person = Person.objects.create(name=name, ascii=unidecode(smart_text(name)), user=user)
40+
person = Person.objects.create(name=name, ascii=unidecode_name(smart_text(name)), user=user)
4041
email = Email.objects.create(address=email_address, person=person)
4142
Role.objects.create(group=group, name_id=role_name, person=person, email=email)
4243
return person

ietf/utils/text.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from __future__ import unicode_literals
22

33
import re
4-
import unicodedata
54
import textwrap
65
import types
6+
import unicodedata
7+
import unidecode
78

89
from django.utils.functional import allow_lazy
910
from django.utils import six
@@ -125,3 +126,10 @@ def isascii(text):
125126
except UnicodeEncodeError:
126127
return False
127128

129+
def unidecode_name(name):
130+
"""
131+
unidecode() of cjk ideograms can produce strings which contain spaces.
132+
Strip leading and trailing spaces, and reduce double-spaces to single.
133+
"""
134+
return unidecode.unidecode(name).strip().replace(' ', ' ')
135+

0 commit comments

Comments
 (0)