(summary) enhance transcription document naming with room context

Add optional room name, recording time and date to generate better
document names based on user feedback. Template is customizable for
internationalization support.
This commit is contained in:
lebaudantoine
2025-07-11 15:24:14 +02:00
committed by aleb_the_flash
parent 16dde229cc
commit 6c4c44e933
4 changed files with 45 additions and 5 deletions

View File

@@ -136,6 +136,13 @@ class NotificationService:
"filename": recording.key, "filename": recording.key,
"email": owner_access.user.email, "email": owner_access.user.email,
"sub": owner_access.user.sub, "sub": owner_access.user.sub,
"room": recording.room.name,
"recording_date": recording.created_at.astimezone(
owner_access.user.timezone
).strftime("%Y-%m-%d"),
"recording_time": recording.created_at.astimezone(
owner_access.user.timezone
).strftime("%H:%M"),
} }
headers = { headers = {

View File

@@ -20,6 +20,9 @@ class TaskCreation(BaseModel):
email: str email: str
sub: str sub: str
version: Optional[int] = 2 version: Optional[int] = 2
room: Optional[str]
recording_date: Optional[str]
recording_time: Optional[str]
router = APIRouter(prefix="/tasks") router = APIRouter(prefix="/tasks")
@@ -34,7 +37,13 @@ async def create_task(request: TaskCreation):
) )
else: else:
task = process_audio_transcribe_summarize_v2.delay( task = process_audio_transcribe_summarize_v2.delay(
request.filename, request.email, request.sub, time.time() request.filename,
request.email,
request.sub,
time.time(),
request.room,
request.recording_date,
request.recording_time,
) )
return {"id": task.id, "message": "Task created"} return {"id": task.id, "message": "Task created"}

View File

@@ -1,10 +1,13 @@
"""Celery workers.""" """Celery workers."""
# ruff: noqa: PLR0913
import json import json
import os import os
import tempfile import tempfile
import time import time
from pathlib import Path from pathlib import Path
from typing import Optional
import openai import openai
import sentry_sdk import sentry_sdk
@@ -233,7 +236,14 @@ def process_audio_transcribe_summarize(filename: str, email: str, sub: str):
max_retries=settings.celery_max_retries, max_retries=settings.celery_max_retries,
) )
def process_audio_transcribe_summarize_v2( def process_audio_transcribe_summarize_v2(
self, filename: str, email: str, sub: str, received_at: float self,
filename: str,
email: str,
sub: str,
received_at: float,
room: Optional[str],
recording_date: Optional[str],
recording_time: Optional[str],
): ):
"""Process an audio file by transcribing it and generating a summary. """Process an audio file by transcribing it and generating a summary.
@@ -269,7 +279,10 @@ def process_audio_transcribe_summarize_v2(
audio_file = File(temp_file_path) audio_file = File(temp_file_path)
metadata_manager.track(task_id, {"audio_length": audio_file.info.length}) metadata_manager.track(task_id, {"audio_length": audio_file.info.length})
if settings.recording_max_duration is not None and audio_file.info.length > settings.recording_max_duration: if (
settings.recording_max_duration is not None
and audio_file.info.length > settings.recording_max_duration
):
error_msg = "Recording too long: %.2fs > %.2fs limit" % ( error_msg = "Recording too long: %.2fs > %.2fs limit" % (
audio_file.info.length, audio_file.info.length,
settings.recording_max_duration, settings.recording_max_duration,
@@ -314,8 +327,16 @@ def process_audio_transcribe_summarize_v2(
metadata_manager.track_transcription_metadata(task_id, transcription) metadata_manager.track_transcription_metadata(task_id, transcription)
if not room or not recording_date or not recording_time:
title = settings.document_default_title
else:
title = settings.document_title_template.format(
room=room,
room_recording_date=recording_date,
room_recording_time=recording_time,
)
data = { data = {
"title": settings.document_title, "title": title,
"content": formatted_transcription, "content": formatted_transcription,
"email": email, "email": email,
"sub": sub, "sub": sub,

View File

@@ -46,7 +46,10 @@ class Settings(BaseSettings):
webhook_url: str webhook_url: str
# Output related settings # Output related settings
document_title: Optional[str] = "Transcription" document_default_title: Optional[str] = "Transcription"
document_title_template: Optional[str] = (
'Réunion "{room}" du {room_recording_date} à {room_recording_time}'
)
# Sentry # Sentry
sentry_is_enabled: bool = False sentry_is_enabled: bool = False