(backend) add dummy content to demo documents

We need to content in our demo documents so that we can test
indexing.
This commit is contained in:
Samuel Paccoud - DINUM
2025-08-06 07:32:27 +02:00
committed by Quentin BEY
parent 4dc3322b0d
commit f4bdde7e59
2 changed files with 51 additions and 33 deletions

View File

@@ -432,32 +432,35 @@ class Document(MP_Node, BaseModel):
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
"""Write content to object storage only if _content has changed.""" """Write content to object storage only if _content has changed."""
super().save(*args, **kwargs) super().save(*args, **kwargs)
if self._content: if self._content:
file_key = self.file_key self.save_content(self._content)
bytes_content = self._content.encode("utf-8")
# Attempt to directly check if the object exists using the storage client. def save_content(self, content):
try: """Save content to object storage."""
response = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=file_key file_key = self.file_key
) bytes_content = content.encode("utf-8")
except ClientError as excpt:
# If the error is a 404, the object doesn't exist, so we should create it. # Attempt to directly check if the object exists using the storage client.
if excpt.response["Error"]["Code"] == "404": try:
has_changed = True response = default_storage.connection.meta.client.head_object(
else: Bucket=default_storage.bucket_name, Key=file_key
raise )
except ClientError as excpt:
# If the error is a 404, the object doesn't exist, so we should create it.
if excpt.response["Error"]["Code"] == "404":
has_changed = True
else: else:
# Compare the existing ETag with the MD5 hash of the new content. raise
has_changed = ( else:
response["ETag"].strip('"') # Compare the existing ETag with the MD5 hash of the new content.
!= hashlib.md5(bytes_content).hexdigest() # noqa: S324 has_changed = (
) response["ETag"].strip('"') != hashlib.md5(bytes_content).hexdigest() # noqa: S324
)
if has_changed: if has_changed:
content_file = ContentFile(bytes_content) content_file = ContentFile(bytes_content)
default_storage.save(file_key, content_file) default_storage.save(file_key, content_file)
def is_leaf(self): def is_leaf(self):
""" """

View File

@@ -1,16 +1,19 @@
# ruff: noqa: S311, S106 # ruff: noqa: S311, S106
"""create_demo management command""" """create_demo management command"""
import base64
import logging import logging
import math import math
import random import random
import time import time
from collections import defaultdict from collections import defaultdict
from uuid import uuid4
from django import db from django import db
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
import pycrdt
from faker import Faker from faker import Faker
from core import models from core import models
@@ -28,6 +31,16 @@ def random_true_with_probability(probability):
return random.random() < probability return random.random() < probability
def get_ydoc_for_text(text):
"""Return a ydoc from plain text for demo purposes."""
ydoc = pycrdt.Doc()
paragraph = pycrdt.XmlElement("p", {}, [pycrdt.XmlText(text)])
fragment = pycrdt.XmlFragment([paragraph])
ydoc["document-store"] = fragment
update = ydoc.get_update()
return base64.b64encode(update).decode("utf-8")
class BulkQueue: class BulkQueue:
"""A utility class to create Django model instances in bulk by just pushing to a queue.""" """A utility class to create Django model instances in bulk by just pushing to a queue."""
@@ -49,7 +62,7 @@ class BulkQueue:
self.queue[objects[0]._meta.model.__name__] = [] # noqa: SLF001 self.queue[objects[0]._meta.model.__name__] = [] # noqa: SLF001
def push(self, obj): def push(self, obj):
"""Add a model instance to queue to that it gets created in bulk.""" """Add a model instance to queue so that it gets created in bulk."""
objects = self.queue[obj._meta.model.__name__] # noqa: SLF001 objects = self.queue[obj._meta.model.__name__] # noqa: SLF001
objects.append(obj) objects.append(obj)
if len(objects) > self.BATCH_SIZE: if len(objects) > self.BATCH_SIZE:
@@ -140,17 +153,19 @@ def create_demo(stdout):
# pylint: disable=protected-access # pylint: disable=protected-access
key = models.Document._int2str(i) # noqa: SLF001 key = models.Document._int2str(i) # noqa: SLF001
padding = models.Document.alphabet[0] * (models.Document.steplen - len(key)) padding = models.Document.alphabet[0] * (models.Document.steplen - len(key))
queue.push( title = fake.sentence(nb_words=4)
models.Document( document = models.Document(
depth=1, id=uuid4(),
path=f"{padding}{key}", depth=1,
creator_id=random.choice(users_ids), path=f"{padding}{key}",
title=fake.sentence(nb_words=4), creator_id=random.choice(users_ids),
link_reach=models.LinkReachChoices.AUTHENTICATED title=title,
if random_true_with_probability(0.5) link_reach=models.LinkReachChoices.AUTHENTICATED
else random.choice(models.LinkReachChoices.values), if random_true_with_probability(0.5)
) else random.choice(models.LinkReachChoices.values),
) )
document.save_content(get_ydoc_for_text(f"Content for {title:s}"))
queue.push(document)
queue.flush() queue.flush()