Skip to content

Commit b85e1c4

Browse files
committed
Added a delete_data_lacking_consent management command that deletes person records and person information for which we need consent according to GDPR, but have not received it.
- Legacy-Id: 15461
1 parent 3f9dbea commit b85e1c4

1 file changed

Lines changed: 180 additions & 0 deletions

File tree

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
# Copyright The IETF Trust 2016, All Rights Reserved
2+
# -*- coding: utf-8 -*-
3+
from __future__ import unicode_literals, print_function
4+
5+
import datetime
6+
import sys
7+
import time
8+
from tqdm import tqdm
9+
10+
from django.conf import settings
11+
from django.contrib.admin.utils import NestedObjects
12+
from django.contrib.auth.models import User
13+
from django.core.exceptions import ObjectDoesNotExist
14+
from django.core.management.base import BaseCommand, CommandError
15+
from django.db.models import F
16+
17+
import debug # pyflakes:ignore
18+
19+
from ietf.community.models import SearchRule
20+
from ietf.nomcom.models import Feedback, Nomination
21+
from ietf.person.models import Person, Alias, PersonEvent, PersonalApiKey, Email
22+
from ietf.person.name import unidecode_name
23+
from ietf.stats.models import MeetingRegistration
24+
from ietf.utils.mail import send_mail
25+
from ietf.utils.log import log
26+
27+
class Command(BaseCommand):
28+
help = (u"""
29+
30+
Delete data for which consent to store the data has not been given,
31+
where the data does not fall under the GDPR Legitimate Interest clause
32+
for the IETF. This includes full name, ascii name, bio, login,
33+
notification subscriptions and email addresses that are not derived from
34+
published drafts or ietf roles.
35+
36+
""")
37+
38+
def add_arguments(self, parser):
39+
parser.add_argument('-n', '--dry-run', action='store_true', default=False,
40+
help="Don't delete anything, just list what would be done.")
41+
# parser.add_argument('-d', '--date', help="Date of deletion (mentioned in message)")
42+
parser.add_argument('-m', '--minimum-response-time', metavar='TIME', type=int, default=14,
43+
help="Minimum response time, default: %(default)s days. Persons to whom a "
44+
"consent request email has been sent more recently than this will not "
45+
"be affected by the run.")
46+
# parser.add_argument('-r', '--rate', type=float, default=1.0,
47+
# help='Rate of sending mail, default: %(default)s/s')
48+
# parser.add_argument('user', nargs='*')
49+
50+
51+
def handle(self, *args, **options):
52+
dry_run = options['dry_run']
53+
verbosity = int(options['verbosity'])
54+
event_type = 'gdpr_notice_email'
55+
settings.DEBUG = False # don't log to console
56+
57+
# users
58+
users = User.objects.filter(person__isnull=True, username__contains='@')
59+
self.stdout.write("Found %d users without associated person records" % (users.count(), ))
60+
emails = Email.objects.filter(address__in=users.values_list('username', flat=True))
61+
# fix up users that don't have person records, but have a username matching a nown email record
62+
self.stdout.write("Checking usernames against email records ...")
63+
for email in tqdm(emails):
64+
user = users.get(username=email.address)
65+
if email.person.user_id:
66+
if dry_run:
67+
self.stdout.write("Would delete user #%-6s (%s) %s" % (user.id, user.last_login, user.username))
68+
else:
69+
log("Deleting user #%-6s (%s) %s: no person record, matching email has other user" % (user.id, user.last_login, user.username))
70+
user_id = user.id
71+
user.delete()
72+
Person.history.filter(user_id=user_id).delete()
73+
Email.history.filter(history_user=user_id).delete()
74+
else:
75+
if dry_run:
76+
self.stdout.write("Would connect user #%-6s %s to person #%-6s %s" % (user.id, user.username, email.person.id, email.person.ascii_name()))
77+
else:
78+
log("Connecting user #%-6s %s to person #%-6s %s" % (user.id, user.username, email.person.id, email.person.ascii_name()))
79+
email.person.user_id = user.id
80+
email.person.save()
81+
# delete users without person records
82+
users = users.exclude(username__in=emails.values_list('address', flat=True))
83+
if dry_run:
84+
self.stdout.write("Would delete %d users without associated person records" % (users.count(), ))
85+
else:
86+
if users.count():
87+
log("Deleting %d users without associated person records" % (users.count(), ))
88+
assert not users.filter(person__isnull=False).exists()
89+
user_ids = users.values_list('id', flat=True)
90+
users.delete()
91+
assert not Person.history.filter(user_id__in=user_ids).exists()
92+
93+
94+
# persons
95+
self.stdout.write('Querying the database for person records without given consent ...')
96+
notification_cutoff = datetime.datetime.now() - datetime.timedelta(days=options['minimum_response_time'])
97+
persons = Person.objects.exclude(consent=True)
98+
persons = persons.exclude(id=1) # make sure we don't delete System ;-)
99+
self.stdout.write("Found %d persons with information for which we don't have consent." % (persons.count(), ))
100+
101+
# Narrow to persons we don't have Legitimate Interest in, and delete those fully
102+
persons = persons.exclude(docevent__by=F('pk'))
103+
persons = persons.exclude(documentauthor__person=F('pk')).exclude(dochistoryauthor__person=F('pk'))
104+
persons = persons.exclude(email__liaisonstatement__from_contact__person=F('pk'))
105+
persons = persons.exclude(email__reviewrequest__reviewer__person=F('pk'))
106+
persons = persons.exclude(email__shepherd_dochistory_set__shepherd__person=F('pk'))
107+
persons = persons.exclude(email__shepherd_document_set__shepherd__person=F('pk'))
108+
persons = persons.exclude(iprevent__by=F('pk'))
109+
persons = persons.exclude(meetingregistration__person=F('pk'))
110+
persons = persons.exclude(message__by=F('pk'))
111+
persons = persons.exclude(name_from_draft='')
112+
persons = persons.exclude(personevent__time__gt=notification_cutoff, personevent__type=event_type)
113+
persons = persons.exclude(reviewrequest__requested_by=F('pk'))
114+
persons = persons.exclude(role__person=F('pk')).exclude(rolehistory__person=F('pk'))
115+
persons = persons.exclude(session__requested_by=F('pk'))
116+
persons = persons.exclude(submissionevent__by=F('pk'))
117+
self.stdout.write("Found %d persons with information for which we neither have consent nor legitimate interest." % (persons.count(), ))
118+
if persons.count() > 0:
119+
self.stdout.write("Deleting records for persons for which we have with neither consent nor legitimate interest ...")
120+
for person in (persons if dry_run else tqdm(persons)):
121+
if dry_run:
122+
self.stdout.write(("Would delete record #%-6d: (%s) %-32s %-48s" % (person.pk, person.time, person.ascii_name(), "<%s>"%person.email())).encode('utf8'))
123+
else:
124+
if verbosity > 1:
125+
# development aids
126+
collector = NestedObjects(using='default')
127+
collector.collect([person,])
128+
objects = collector.nested()
129+
related = [ o for o in objects[-1] if not isinstance(o, (Alias, Person, SearchRule, PersonalApiKey)) ]
130+
if len(related) > 0:
131+
self.stderr.write("Person record #%-6s %s has unexpected related records" % (person.pk, person.ascii_name()))
132+
133+
# Historical records using simple_history has on_delete=DO_NOTHING, so
134+
# we have to do explicit deletions:
135+
id = person.id
136+
person.delete()
137+
Person.history.filter(id=id).delete()
138+
Email.history.filter(person_id=id).delete()
139+
140+
# Deal with remaining persons (lacking consent, but with legitimate interest)
141+
persons = Person.objects.exclude(consent=True)
142+
persons = persons.exclude(id=1)
143+
self.stdout.write("Found %d remaining persons with information for which we don't have consent." % (persons.count(), ))
144+
if persons.count() > 0:
145+
self.stdout.write("Removing personal information requiring consent ...")
146+
for person in (persons if dry_run else tqdm(persons)):
147+
fields = ', '.join(person.needs_consent())
148+
if dry_run:
149+
self.stdout.write(("Would remove info for #%-6d: (%s) %-32s %-48s %s" % (person.pk, person.time, person.ascii_name(), "<%s>"%person.email(), fields)).encode('utf8'))
150+
else:
151+
if person.name_from_draft:
152+
log("Using name info from draft for #%-6d %s: no consent, no roles" % (person.pk, person))
153+
person.name = person.name_from_draft
154+
person.ascii = unidecode_name(person.name_from_draft)
155+
if person.biography:
156+
log("Deleting biography for #%-6d %s: no consent, no roles" % (person.pk, person))
157+
person.biography = ''
158+
person.save()
159+
if person.user_id:
160+
if User.objects.filter(id=person.user_id).exists():
161+
log("Deleting communitylist for #%-6d %s: no consent, no roles" % (person.pk, person))
162+
person.user.communitylist_set.all().delete()
163+
for email in person.email_set.all():
164+
if not email.origin.split(':')[0] in ['author', 'role', 'reviewer', 'liaison', 'shepherd', ]:
165+
log("Deleting email <%s> for #%-6d %s: no consent, no roles" % (email.address, person.pk, person))
166+
address = email.address
167+
email.delete()
168+
Email.history.filter(address=address).delete()
169+
170+
emails = Email.objects.filter(origin='', person__consent=False)
171+
self.stdout.write("Found %d emails without origin for which we lack consent." % (emails.count(), ))
172+
if dry_run:
173+
self.stdout.write("Would delete %d email records without origin and consent" % (emails.count(), ))
174+
else:
175+
if emails.count():
176+
log("Deleting %d email records without origin and consent" % (emails.count(), ))
177+
addresses = emails.values_list('address', flat=True)
178+
emails.delete()
179+
Email.history.filter(address__in=addresses).delete()
180+

0 commit comments

Comments
 (0)