(backend) add dummy content to demo documents

We need to content in our demo documents so that we can test
indexing.
This commit is contained in:
Samuel Paccoud - DINUM
2025-08-06 07:32:27 +02:00
committed by Quentin BEY
parent 4dc3322b0d
commit f4bdde7e59
2 changed files with 51 additions and 33 deletions

View File

@@ -432,32 +432,35 @@ class Document(MP_Node, BaseModel):
def save(self, *args, **kwargs):
"""Write content to object storage only if _content has changed."""
super().save(*args, **kwargs)
if self._content:
file_key = self.file_key
bytes_content = self._content.encode("utf-8")
self.save_content(self._content)
# Attempt to directly check if the object exists using the storage client.
try:
response = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=file_key
)
except ClientError as excpt:
# If the error is a 404, the object doesn't exist, so we should create it.
if excpt.response["Error"]["Code"] == "404":
has_changed = True
else:
raise
def save_content(self, content):
"""Save content to object storage."""
file_key = self.file_key
bytes_content = content.encode("utf-8")
# Attempt to directly check if the object exists using the storage client.
try:
response = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=file_key
)
except ClientError as excpt:
# If the error is a 404, the object doesn't exist, so we should create it.
if excpt.response["Error"]["Code"] == "404":
has_changed = True
else:
# Compare the existing ETag with the MD5 hash of the new content.
has_changed = (
response["ETag"].strip('"')
!= hashlib.md5(bytes_content).hexdigest() # noqa: S324
)
raise
else:
# Compare the existing ETag with the MD5 hash of the new content.
has_changed = (
response["ETag"].strip('"') != hashlib.md5(bytes_content).hexdigest() # noqa: S324
)
if has_changed:
content_file = ContentFile(bytes_content)
default_storage.save(file_key, content_file)
if has_changed:
content_file = ContentFile(bytes_content)
default_storage.save(file_key, content_file)
def is_leaf(self):
"""

View File

@@ -1,16 +1,19 @@
# ruff: noqa: S311, S106
"""create_demo management command"""
import base64
import logging
import math
import random
import time
from collections import defaultdict
from uuid import uuid4
from django import db
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
import pycrdt
from faker import Faker
from core import models
@@ -28,6 +31,16 @@ def random_true_with_probability(probability):
return random.random() < probability
def get_ydoc_for_text(text):
"""Return a ydoc from plain text for demo purposes."""
ydoc = pycrdt.Doc()
paragraph = pycrdt.XmlElement("p", {}, [pycrdt.XmlText(text)])
fragment = pycrdt.XmlFragment([paragraph])
ydoc["document-store"] = fragment
update = ydoc.get_update()
return base64.b64encode(update).decode("utf-8")
class BulkQueue:
"""A utility class to create Django model instances in bulk by just pushing to a queue."""
@@ -49,7 +62,7 @@ class BulkQueue:
self.queue[objects[0]._meta.model.__name__] = [] # noqa: SLF001
def push(self, obj):
"""Add a model instance to queue to that it gets created in bulk."""
"""Add a model instance to queue so that it gets created in bulk."""
objects = self.queue[obj._meta.model.__name__] # noqa: SLF001
objects.append(obj)
if len(objects) > self.BATCH_SIZE:
@@ -140,17 +153,19 @@ def create_demo(stdout):
# pylint: disable=protected-access
key = models.Document._int2str(i) # noqa: SLF001
padding = models.Document.alphabet[0] * (models.Document.steplen - len(key))
queue.push(
models.Document(
depth=1,
path=f"{padding}{key}",
creator_id=random.choice(users_ids),
title=fake.sentence(nb_words=4),
link_reach=models.LinkReachChoices.AUTHENTICATED
if random_true_with_probability(0.5)
else random.choice(models.LinkReachChoices.values),
)
title = fake.sentence(nb_words=4)
document = models.Document(
id=uuid4(),
depth=1,
path=f"{padding}{key}",
creator_id=random.choice(users_ids),
title=title,
link_reach=models.LinkReachChoices.AUTHENTICATED
if random_true_with_probability(0.5)
else random.choice(models.LinkReachChoices.values),
)
document.save_content(get_ydoc_for_text(f"Content for {title:s}"))
queue.push(document)
queue.flush()