Skip to content

Commit f9136dc

Browse files
committed
Changed the photo collection script to use a more sophisticated algorithm to separate name parts, in order to avoid photos named 'dr-foo-bar-ph-d', and in order to correctly identify surnames like 'le-faucheur'. Added translation for the first-name only named IAB photos. Added additional directories for IAB photos.
- Legacy-Id: 11271
1 parent 33a0629 commit f9136dc

1 file changed

Lines changed: 141 additions & 70 deletions

File tree

ietf/bin/2016-05-25-collect-photos

Lines changed: 141 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env python
22

3-
import os, sys, shutil, pathlib
3+
import os, re, sys, shutil, pathlib
44
from collections import namedtuple
55
from PIL import Image
66

@@ -13,12 +13,12 @@ import django
1313
django.setup()
1414

1515
from django.conf import settings
16+
from django.utils.text import slugify
17+
1618
import debug
1719

1820
from ietf.group.models import Role, Person
19-
20-
21-
21+
from ietf.person.name import name_parts
2222

2323
old_images_dir = ''
2424
new_images_dir = settings.PHOTOS_DIR
@@ -42,41 +42,87 @@ for f in old_image_files:
4242
img = Image.open(path)
4343
old_images.append(photo(path, f.stem.decode('utf8'), f.suffix, img.size[0], img.size[1], f.stat().st_mtime, f))
4444

45+
# Fix up some names:
46+
47+
def fix_missing_surnames(images):
48+
replacement = {
49+
"alissa": "alissa-cooper",
50+
"alissa1": "alissa-cooper",
51+
"andrei": "andrei-robachevsky",
52+
"bernard": "bernard-aboba",
53+
"danny": "danny-mcpherson",
54+
"danny1": "danny-mcpherson",
55+
"dthaler": "dave-thaler",
56+
"eliot-mug": "eliot-lear",
57+
"erik.nordmark-300": "erik-nordmark",
58+
"hannes": "hannes-tschofenig",
59+
"hildebrand": "joe-hildebrand",
60+
"housley": "russ-housley",
61+
"jariarkko": "jari-arkko",
62+
"joel": "joel-jaeggli",
63+
"joel1": "joel-jaeggli",
64+
"joel2": "joel-jaeggli",
65+
"jon": "jon-peterson",
66+
"kessens": "david-kessens",
67+
"klensin": "john-klensin",
68+
"lars": "lars-eggert",
69+
"lars1": "lars-eggert",
70+
"marc_blanchet": "marc-blanchet",
71+
"marcelo": "marcelo-bagnulo",
72+
"olaf": "olaf-kolkman",
73+
"olaf1": "olaf-kolkman",
74+
"ross": "ross-callon",
75+
"spencer": "spencer-dawkins",
76+
"spencer1": "spencer-dawkins",
77+
"vijay": "vijay-gurbani",
78+
"xing": "xing-li",
79+
}
80+
81+
for i in range(len(images)):
82+
img = images[i]
83+
name = re.sub('-[0-9]+x[0-9]+', '', img.name)
84+
if '/iab/' in img.path and name in replacement:
85+
name = replacement[name]
86+
images[i] = photo(img.path, name, img.ext, img.width, img.height, img.time, img.file)
87+
88+
89+
fix_missing_surnames(old_images)
90+
4591
interesting_persons = set(Person.objects.all())
4692

4793
name_alias = {
48-
"andy": ["andrew", ],
49-
"ben": ["benjamin", ],
50-
"bill": ["william", ],
51-
"bob": ["robert", ],
52-
"chris": ["christopher", "christian"],
53-
"dan": ["daniel", ],
54-
"dave": ["david", ],
55-
"dick": ["richard", ],
56-
"fred": ["alfred", ],
57-
"geoff": ["geoffrey", ],
58-
"jake": ["jacob", ],
59-
"jerry": ["gerald", ],
60-
"jim": ["james", ],
61-
"joe": ["joseph", ],
62-
"jon": ["jonathan", ],
63-
"mike": ["michael", ],
64-
"ned": ["edward", ],
65-
"pete": ["peter", ],
66-
"ron": ["ronald", ],
67-
"russ": ["russel", ],
68-
"steve": ["stephen", ],
69-
"ted": ["edward", ],
70-
"terry": ["terence", ],
71-
"tom": ["thomas", ],
72-
"wes": ["wesley", ],
73-
"will": ["william", ],
74-
75-
"beth": ["elizabeth", ],
76-
"liz": ["elizabeth", ],
77-
"lynn": ["carolyn", ],
78-
"pat": ["patricia", "patrick", ],
79-
"sue": ["susan", ],
94+
u"andy": [u"andrew", ],
95+
u"ben": [u"benjamin", ],
96+
u"bill": [u"william", ],
97+
u"bob": [u"robert", ],
98+
u"chris": [u"christopher", u"christian"],
99+
u"dan": [u"daniel", ],
100+
u"dave": [u"david", ],
101+
u"dick": [u"richard", ],
102+
u"fred": [u"alfred", ],
103+
u"geoff": [u"geoffrey", ],
104+
u"jake": [u"jacob", ],
105+
u"jerry": [u"gerald", ],
106+
u"jim": [u"james", ],
107+
u"joe": [u"joseph", ],
108+
u"jon": [u"jonathan", ],
109+
u"mike": [u"michael", ],
110+
u"ned": [u"edward", ],
111+
u"pete": [u"peter", ],
112+
u"ron": [u"ronald", ],
113+
u"russ": [u"russel", ],
114+
u"steve": [u"stephen", ],
115+
u"ted": [u"edward", ],
116+
u"terry": [u"terence", ],
117+
u"tom": [u"thomas", ],
118+
u"wes": [u"wesley", ],
119+
u"will": [u"william", ],
120+
121+
u"beth": [u"elizabeth", ],
122+
u"liz": [u"elizabeth", ],
123+
u"lynn": [u"carolyn", ],
124+
u"pat": [u"patricia", u"patrick", ],
125+
u"sue": [u"susan", ],
80126
}
81127
# Add lookups from long to short, from the initial set
82128
for key,value in name_alias.items():
@@ -119,37 +165,53 @@ for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.asci
119165
break
120166
if not person.ascii.strip():
121167
print(" Setting person.ascii for %s" % person.name)
122-
person.ascii = person.name.encode('ascii', errors='replace')
123-
debug.show('person.ascii')
124-
name_parts = person.ascii.lower().split()
168+
person.ascii = person.name.encode('ascii', errors='replace').decode('ascii')
169+
170+
_, first, _, last, _ = person.ascii_parts()
171+
first = first.lower()
172+
last = last. lower()
125173
if not substr_pattern:
126-
substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1])
174+
substr_pattern = slugify("%s %s" % (last, first))
175+
176+
if first in ['', '<>'] or last in ['', '<>']:
177+
continue
178+
179+
#debug.show('1, substr_pattern')
127180

128181
candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)]
129182
# Also check the reverse the name order (necessary for Deng Hui, for instance)
130-
substr_pattern = u'-'.join(name_parts[0:1]+name_parts[-1:])
183+
substr_pattern = slugify("%s %s" % (first, last))
184+
#debug.show('2, substr_pattern')
185+
prev_len = len(candidates)
131186
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
132-
if candidates:
133-
print(" Used '%s %s' instead of '%s %s'" % (name_parts[-1], name_parts[0], name_parts[0], name_parts[-1], ))
187+
if prev_len < len(candidates) :
188+
print(" Found match with '%s %s' for '%s %s'" % (last, first, first, last, ))
134189
# If no joy, try a short name
135-
if name_parts[0] in name_alias:
136-
for alias in name_alias[name_parts[0]]:
137-
substr_pattern = u'-'.join(name_parts[-1:]+[alias])
190+
if first in name_alias:
191+
prev_len = len(candidates)
192+
for alias in name_alias[first]:
193+
substr_pattern = slugify("%s %s" % (last, alias))
194+
#debug.show('3, substr_pattern')
138195
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
139-
if candidates:
140-
print(" Used '%s %s' instead of '%s %s'" % (alias, name_parts[-1], name_parts[0], name_parts[-1], ))
141-
# If still no joy, try with Person.plain_name() (necessary for Donald Eastlake)
142-
if not candidates:
143-
name_parts = person.plain_name().lower().split()
144-
substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1])
145-
candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)]
146-
# If no joy, try a short name
147-
if not candidates and name_parts[0] in name_alias:
148-
for alias in name_alias[name_parts[0]]:
149-
substr_pattern = u'-'.join(name_parts[-1:]+[alias])
150-
candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
151-
if candidates:
152-
print(" Used '%s %s' instead of '%s %s'" % (alias, name_parts[-1], name_parts[0], name_parts[-1], ))
196+
if prev_len < len(candidates):
197+
print(" Found match with '%s %s' for '%s %s'" % (alias, last, first, last, ))
198+
199+
200+
# # If still no joy, try with Person.plain_name() (necessary for Donald Eastlake)
201+
# if not candidates:
202+
# prefix, first, middle, last, suffix = person.name_parts()
203+
# name_parts = person.plain_name().lower().split()
204+
#
205+
# substr_pattern = u'-'.join(name_parts[-1:]+name_parts[0:1])
206+
# candidates = [x for x in old_images if x.name.lower().startswith(substr_pattern)]
207+
# # If no joy, try a short name
208+
# if not candidates and first in name_alias:
209+
# prev_len = len(candidates)
210+
# for alias in name_alias[first]:
211+
# substr_pattern = u'-'.join(name_parts[-1:]+[alias])
212+
# candidates += [x for x in old_images if x.name.lower().startswith(substr_pattern)]
213+
# if prev_len < len(candidates) :
214+
# print(" Used '%s %s' instead of '%s %s'" % (alias, last, first, last, ))
153215

154216
# # Fixup for other exceptional cases
155217
# if person.ascii=="David Oran":
@@ -172,15 +234,24 @@ for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.asci
172234
# - if none found, then the smallest photo
173235
if candidates:
174236
candidates.sort(key=lambda x: "%04d-%d" % (x.width, x.time))
175-
full = candidates[-1]
176-
thumbs = [ c for c in candidates if c.width==c.height and c.width <= 200 ]
177-
if not thumbs:
178-
thumbs = [ c for c in candidates if c.width==c.height ]
179-
if not thumbs:
180-
thumbs = [ c for c in candidates if c.width <= 200 ]
181-
if not thumbs:
182-
thumbs = candidates[:1]
183-
thumb = thumbs[-1]
237+
iesg_cand = [ c for c in candidates if '/iesg/' in c.path ]
238+
iab_cand = [ c for c in candidates if '/iab/' in c.path ]
239+
if iesg_cand:
240+
full = iesg_cand[-1]
241+
thumb = iesg_cand[-1]
242+
elif iab_cand:
243+
full = iab_cand[-1]
244+
thumb = iab_cand[0]
245+
else:
246+
full = candidates[-1]
247+
thumbs = [ c for c in candidates if c.width==c.height and c.width <= 200 ]
248+
if not thumbs:
249+
thumbs = [ c for c in candidates if c.width==c.height ]
250+
if not thumbs:
251+
thumbs = [ c for c in candidates if c.width <= 200 ]
252+
if not thumbs:
253+
thumbs = candidates[:1]
254+
thumb = thumbs[-1]
184255
candidates = [ thumb, full ]
185256

186257
# At this point we either have no candidates or two. If two, the first will be the thumb
@@ -203,6 +274,7 @@ for person in sorted(list(interesting_persons),key=lambda x:x.last_name()+x.asci
203274
#
204275
copy( thumb.path, os.path.join(new_images_dir,new_thumb_name) )
205276

277+
206278
print("")
207279
not_processed = 0
208280
for file in old_image_files:
@@ -217,5 +289,4 @@ for file in old_image_files:
217289
not_processed += 1
218290
print(u"Not processed: "+str(file).decode('utf8'))
219291
print("")
220-
print("")
221292
print("Not processed: %s files" % not_processed)

0 commit comments

Comments
 (0)