From 7278613b20c446b6d35a053192e91fc1f03b61c2 Mon Sep 17 00:00:00 2001 From: lebaudantoine Date: Mon, 11 Nov 2024 19:00:52 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=97=83=EF=B8=8F(backend)=20merge=20duplic?= =?UTF-8?q?ate=20user=20accounts=20on=20email?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Write the proper ORM code to sanitize the rows in db and avoid existing users lose access to our app. Existing duplicate user accounts are merged, and resource accesses are transferred. --- .../migrations/0006_merge_duplicate_users.py | 89 +++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 src/backend/core/migrations/0006_merge_duplicate_users.py diff --git a/src/backend/core/migrations/0006_merge_duplicate_users.py b/src/backend/core/migrations/0006_merge_duplicate_users.py new file mode 100644 index 00000000..fd1217a3 --- /dev/null +++ b/src/backend/core/migrations/0006_merge_duplicate_users.py @@ -0,0 +1,89 @@ + +from django.db import migrations +from django.db.models import Count +from core.models import RoleChoices + +def merge_duplicate_user_accounts(apps, schema_editor): + """Merge user accounts that share the same email address. + + Historical Context: + Previously, ProConnect authentication could return users with the same email + but different sub, leading to duplicate user accounts. While the application + now prevents this scenario, this migration is needed to clean up existing + duplicate accounts to ensure users can continue to connect without being blocked + by unique email constraints. + + Performance of this migration is poor, this implementation prioritizes readability + and maintainability. Consider refactoring this code to avoid individual db queries + on each iteration. + """ + + User = apps.get_model('core', 'User') + ResourceAccess = apps.get_model('core', 'ResourceAccess') + + emails_with_duplicates = ( + User.objects.values('email') + .annotate(count=Count('id')) + .filter(count__gt=1) + .values_list('email', flat=True) + ) + + for email in emails_with_duplicates: + # Keep the oldest user + primary_user = User.objects.filter(email=email).order_by('created_at').first() + duplicate_user_accounts = User.objects.filter(email=email).exclude(id=primary_user.id) + + # Get IDs of duplicate accounts to be merged + duplicate_account_ids = list(duplicate_user_accounts.values_list('id', flat=True)) + resource_accesses_to_transfer = ResourceAccess.objects.filter(user_id__in=duplicate_account_ids) + + # Transfer resource access permissions to primary user + # This process handles role hierarchy where: + # OWNER > ADMIN > MEMBER + for resource_access in resource_accesses_to_transfer: + + # Determine if primary user already has access to this resource + existing_primary_access = ResourceAccess.objects.filter( + user_id=primary_user.id, + resource_id=resource_access.resource.id + ).first() + + if existing_primary_access: + # Skip if primary user is already OWNER as it's the highest privilege level + # No need to modify or downgrade owner access + if existing_primary_access.role == RoleChoices.OWNER: + continue + + # Skip if primary user already has the exact same role + # No need to update when roles match + elif existing_primary_access.role == resource_access.role: + continue + + # Skip if new role is MEMBER since user already has base access + # All existing access includes at least MEMBER privileges + elif resource_access.role == RoleChoices.MEMBER: + continue + + # Update the role only if it represents a higher privilege level + # Preserves existing access record while updating the role + existing_primary_access.role = resource_access.role + existing_primary_access.save() + else: + # Transfer access to primary user + resource_access.user_id = primary_user.id + resource_access.save() + + # Delete duplicate accounts - CASCADE will automatically remove any untransferred + # ResourceAccess records and other related data for these users + duplicate_user_accounts.delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0005_recording_recordingaccess_and_more'), + ] + + operations = [ + migrations.RunPython(merge_duplicate_user_accounts, reverse_code=migrations.RunPython.noop), + ]