diff --git a/CHANGELOG.md b/CHANGELOG.md index 72ad5757..8eb3b458 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to ## [Unreleased] +## Fixed + +- 🐛(db) fix users duplicate #316 + ## [1.5.0] - 2024-10-09 diff --git a/src/backend/core/migrations/0007_fix_users_duplicate.py b/src/backend/core/migrations/0007_fix_users_duplicate.py new file mode 100644 index 00000000..50139299 --- /dev/null +++ b/src/backend/core/migrations/0007_fix_users_duplicate.py @@ -0,0 +1,128 @@ +# Generated by Django 5.1.1 on 2024-10-10 11:45 + +from django.db import migrations + +procedure = """ +DO $$ +DECLARE + user_email TEXT; +BEGIN + -- Step 1: Create a temporary table (without the unique constraint) + -- impress_document_access + DROP TABLE IF EXISTS impress_document_access_tmp; + CREATE TEMP TABLE impress_document_access_tmp AS + SELECT * FROM impress_document_access; + + -- impress_link_trace + DROP TABLE IF EXISTS impress_link_trace_tmp; + CREATE TEMP TABLE impress_link_trace_tmp AS + SELECT * FROM impress_link_trace; + + -- Step 2: Loop through each email that appears more than once + FOR user_email IN + SELECT email + FROM impress_user + GROUP BY email + HAVING COUNT(email) > 1 + LOOP + -- Step 3: Update user_id in the temporary table based on email + -- For impress_document_access + UPDATE impress_document_access_tmp + SET user_id = ( + SELECT id + FROM impress_user + WHERE email = user_email + LIMIT 1 + ) + WHERE user_id IN ( + SELECT id + FROM impress_user + WHERE email = user_email + ); + + -- For impress_link_trace + UPDATE impress_link_trace_tmp + SET user_id = ( + SELECT id + FROM impress_user + WHERE email = user_email + LIMIT 1 + ) + WHERE user_id IN ( + SELECT id + FROM impress_user + WHERE email = user_email + ); + + -- update impress_invitation + UPDATE impress_invitation + SET issuer_id = ( + SELECT id + FROM impress_user + WHERE email = user_email + LIMIT 1 + ) + WHERE issuer_id IN ( + SELECT id + FROM impress_user + WHERE email = user_email + ); + + DELETE FROM impress_user + WHERE id IN ( + SELECT id + FROM impress_user + WHERE email = user_email + ) + AND id != ( + SELECT id + FROM impress_user + WHERE email = user_email + LIMIT 1 + ); + + RAISE NOTICE 'Processed updates for email: %', user_email; + END LOOP; + + -- Step 4: Remove duplicate rows from the temporary table, keeping only one row per (document_id, user_id) + -- For impress_document_access + DELETE FROM impress_document_access_tmp a + USING impress_document_access_tmp b + WHERE a.ctid < b.ctid -- Keep one row + AND a.document_id = b.document_id + AND a.user_id = b.user_id; + + -- Step 5: Replace the original table with the cleaned-up temporary table + TRUNCATE TABLE impress_document_access; + + -- Insert cleaned-up data back into the original table + INSERT INTO impress_document_access + SELECT * FROM impress_document_access_tmp; + + -- For impress_link_trace + DELETE FROM impress_link_trace_tmp a + USING impress_link_trace_tmp b + WHERE a.ctid < b.ctid -- Keep one row + AND a.document_id = b.document_id + AND a.user_id = b.user_id; + + -- Step 5: Replace the original table with the cleaned-up temporary table + TRUNCATE TABLE impress_link_trace; + + -- Insert cleaned-up data back into the original table + INSERT INTO impress_link_trace + SELECT * FROM impress_link_trace_tmp; + + RAISE NOTICE 'Update and deduplication process completed.'; +END $$; +""" + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0006_add_user_full_name_and_short_name'), + ] + + operations = [ + migrations.RunSQL(procedure), + ]