From 053c4a40e96d57a5c728108f79d148693f282afd Mon Sep 17 00:00:00 2001 From: lebaudantoine Date: Wed, 4 Sep 2024 13:51:02 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=A9=B9(backend)=20fix=20identity=20hash?= =?UTF-8?q?=20randomness?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'hash' built-in function is randomly seed by Python process. In staging or production, our backend runs over 3 pods, thus 3 Python processes. For a given identity, it was not prompting the same hash across all pods. Why 'hash' is randomly seed? For security reasons, there was a vulnerability disclosure exploiting key collision. Since Python 3.2, 'hash' is by default randomly seed. Fixed it! Thx @jonathanperret for your help. --- src/backend/core/utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/backend/core/utils.py b/src/backend/core/utils.py index a4c67792..304b5de7 100644 --- a/src/backend/core/utils.py +++ b/src/backend/core/utils.py @@ -4,6 +4,7 @@ Utils functions used in the core app # ruff: noqa:S311 +import hashlib import json import random from typing import Optional @@ -24,7 +25,11 @@ def generate_color(identity: str) -> str: range and ensure predictability. """ - random.seed(hash(identity)) + # ruff: noqa:S324 + identity_hash = hashlib.sha1(identity.encode("utf-8")) + # Keep only hash's last 16 bits, collisions are not a concern + seed = int(identity_hash.hexdigest(), 16) & 0xFFFF + random.seed(seed) hue = random.randint(0, 360) saturation = random.randint(50, 75) lightness = random.randint(25, 60)