🐛(db) fix users duplicate

Some OIDC identity providers provide a random
value in the "sub" field instead of an
identifying ID.
It created duplicate users in the database.
This migration fixes the issue by removing the
duplicate users after having updated all
the references to the old users.
This commit is contained in:
Anthony LC
2024-10-10 13:57:36 +02:00
committed by Anthony LC
parent 39d0211593
commit 0d5b2382ab
2 changed files with 132 additions and 0 deletions

View File

@@ -9,6 +9,10 @@ and this project adheres to
## [Unreleased]
## Fixed
- 🐛(db) fix users duplicate #316
## [1.5.0] - 2024-10-09

View File

@@ -0,0 +1,128 @@
# Generated by Django 5.1.1 on 2024-10-10 11:45
from django.db import migrations
procedure = """
DO $$
DECLARE
user_email TEXT;
BEGIN
-- Step 1: Create a temporary table (without the unique constraint)
-- impress_document_access
DROP TABLE IF EXISTS impress_document_access_tmp;
CREATE TEMP TABLE impress_document_access_tmp AS
SELECT * FROM impress_document_access;
-- impress_link_trace
DROP TABLE IF EXISTS impress_link_trace_tmp;
CREATE TEMP TABLE impress_link_trace_tmp AS
SELECT * FROM impress_link_trace;
-- Step 2: Loop through each email that appears more than once
FOR user_email IN
SELECT email
FROM impress_user
GROUP BY email
HAVING COUNT(email) > 1
LOOP
-- Step 3: Update user_id in the temporary table based on email
-- For impress_document_access
UPDATE impress_document_access_tmp
SET user_id = (
SELECT id
FROM impress_user
WHERE email = user_email
LIMIT 1
)
WHERE user_id IN (
SELECT id
FROM impress_user
WHERE email = user_email
);
-- For impress_link_trace
UPDATE impress_link_trace_tmp
SET user_id = (
SELECT id
FROM impress_user
WHERE email = user_email
LIMIT 1
)
WHERE user_id IN (
SELECT id
FROM impress_user
WHERE email = user_email
);
-- update impress_invitation
UPDATE impress_invitation
SET issuer_id = (
SELECT id
FROM impress_user
WHERE email = user_email
LIMIT 1
)
WHERE issuer_id IN (
SELECT id
FROM impress_user
WHERE email = user_email
);
DELETE FROM impress_user
WHERE id IN (
SELECT id
FROM impress_user
WHERE email = user_email
)
AND id != (
SELECT id
FROM impress_user
WHERE email = user_email
LIMIT 1
);
RAISE NOTICE 'Processed updates for email: %', user_email;
END LOOP;
-- Step 4: Remove duplicate rows from the temporary table, keeping only one row per (document_id, user_id)
-- For impress_document_access
DELETE FROM impress_document_access_tmp a
USING impress_document_access_tmp b
WHERE a.ctid < b.ctid -- Keep one row
AND a.document_id = b.document_id
AND a.user_id = b.user_id;
-- Step 5: Replace the original table with the cleaned-up temporary table
TRUNCATE TABLE impress_document_access;
-- Insert cleaned-up data back into the original table
INSERT INTO impress_document_access
SELECT * FROM impress_document_access_tmp;
-- For impress_link_trace
DELETE FROM impress_link_trace_tmp a
USING impress_link_trace_tmp b
WHERE a.ctid < b.ctid -- Keep one row
AND a.document_id = b.document_id
AND a.user_id = b.user_id;
-- Step 5: Replace the original table with the cleaned-up temporary table
TRUNCATE TABLE impress_link_trace;
-- Insert cleaned-up data back into the original table
INSERT INTO impress_link_trace
SELECT * FROM impress_link_trace_tmp;
RAISE NOTICE 'Update and deduplication process completed.';
END $$;
"""
class Migration(migrations.Migration):
dependencies = [
('core', '0006_add_user_full_name_and_short_name'),
]
operations = [
migrations.RunSQL(procedure),
]