From 0d5b2382abb9cdbef3987304938a4076da7e463c Mon Sep 17 00:00:00 2001 From: Anthony LC Date: Thu, 10 Oct 2024 13:57:36 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B(db)=20fix=20users=20duplicate?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some OIDC identity providers provide a random value in the "sub" field instead of an identifying ID. It created duplicate users in the database. This migration fixes the issue by removing the duplicate users after having updated all the references to the old users. --- CHANGELOG.md | 4 + .../migrations/0007_fix_users_duplicate.py | 128 ++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 src/backend/core/migrations/0007_fix_users_duplicate.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 72ad5757..8eb3b458 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to ## [Unreleased] +## Fixed + +- 🐛(db) fix users duplicate #316 + ## [1.5.0] - 2024-10-09 diff --git a/src/backend/core/migrations/0007_fix_users_duplicate.py b/src/backend/core/migrations/0007_fix_users_duplicate.py new file mode 100644 index 00000000..50139299 --- /dev/null +++ b/src/backend/core/migrations/0007_fix_users_duplicate.py @@ -0,0 +1,128 @@ +# Generated by Django 5.1.1 on 2024-10-10 11:45 + +from django.db import migrations + +procedure = """ +DO $$ +DECLARE + user_email TEXT; +BEGIN + -- Step 1: Create a temporary table (without the unique constraint) + -- impress_document_access + DROP TABLE IF EXISTS impress_document_access_tmp; + CREATE TEMP TABLE impress_document_access_tmp AS + SELECT * FROM impress_document_access; + + -- impress_link_trace + DROP TABLE IF EXISTS impress_link_trace_tmp; + CREATE TEMP TABLE impress_link_trace_tmp AS + SELECT * FROM impress_link_trace; + + -- Step 2: Loop through each email that appears more than once + FOR user_email IN + SELECT email + FROM impress_user + GROUP BY email + HAVING COUNT(email) > 1 + LOOP + -- Step 3: Update user_id in the temporary table based on email + -- For impress_document_access + UPDATE impress_document_access_tmp + SET user_id = ( + SELECT id + FROM impress_user + WHERE email = user_email + LIMIT 1 + ) + WHERE user_id IN ( + SELECT id + FROM impress_user + WHERE email = user_email + ); + + -- For impress_link_trace + UPDATE impress_link_trace_tmp + SET user_id = ( + SELECT id + FROM impress_user + WHERE email = user_email + LIMIT 1 + ) + WHERE user_id IN ( + SELECT id + FROM impress_user + WHERE email = user_email + ); + + -- update impress_invitation + UPDATE impress_invitation + SET issuer_id = ( + SELECT id + FROM impress_user + WHERE email = user_email + LIMIT 1 + ) + WHERE issuer_id IN ( + SELECT id + FROM impress_user + WHERE email = user_email + ); + + DELETE FROM impress_user + WHERE id IN ( + SELECT id + FROM impress_user + WHERE email = user_email + ) + AND id != ( + SELECT id + FROM impress_user + WHERE email = user_email + LIMIT 1 + ); + + RAISE NOTICE 'Processed updates for email: %', user_email; + END LOOP; + + -- Step 4: Remove duplicate rows from the temporary table, keeping only one row per (document_id, user_id) + -- For impress_document_access + DELETE FROM impress_document_access_tmp a + USING impress_document_access_tmp b + WHERE a.ctid < b.ctid -- Keep one row + AND a.document_id = b.document_id + AND a.user_id = b.user_id; + + -- Step 5: Replace the original table with the cleaned-up temporary table + TRUNCATE TABLE impress_document_access; + + -- Insert cleaned-up data back into the original table + INSERT INTO impress_document_access + SELECT * FROM impress_document_access_tmp; + + -- For impress_link_trace + DELETE FROM impress_link_trace_tmp a + USING impress_link_trace_tmp b + WHERE a.ctid < b.ctid -- Keep one row + AND a.document_id = b.document_id + AND a.user_id = b.user_id; + + -- Step 5: Replace the original table with the cleaned-up temporary table + TRUNCATE TABLE impress_link_trace; + + -- Insert cleaned-up data back into the original table + INSERT INTO impress_link_trace + SELECT * FROM impress_link_trace_tmp; + + RAISE NOTICE 'Update and deduplication process completed.'; +END $$; +""" + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0006_add_user_full_name_and_short_name'), + ] + + operations = [ + migrations.RunSQL(procedure), + ]