From 6c4c44e933b258789007d3e3e8521253e0d98cc4 Mon Sep 17 00:00:00 2001 From: lebaudantoine Date: Fri, 11 Jul 2025 15:24:14 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(summary)=20enhance=20transcription=20?= =?UTF-8?q?document=20naming=20with=20room=20context?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add optional room name, recording time and date to generate better document names based on user feedback. Template is customizable for internationalization support. --- .../core/recording/event/notification.py | 7 +++++ src/summary/summary/api/route/tasks.py | 11 +++++++- src/summary/summary/core/celery_worker.py | 27 ++++++++++++++++--- src/summary/summary/core/config.py | 5 +++- 4 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/backend/core/recording/event/notification.py b/src/backend/core/recording/event/notification.py index b441a543..49f0d02e 100644 --- a/src/backend/core/recording/event/notification.py +++ b/src/backend/core/recording/event/notification.py @@ -136,6 +136,13 @@ class NotificationService: "filename": recording.key, "email": owner_access.user.email, "sub": owner_access.user.sub, + "room": recording.room.name, + "recording_date": recording.created_at.astimezone( + owner_access.user.timezone + ).strftime("%Y-%m-%d"), + "recording_time": recording.created_at.astimezone( + owner_access.user.timezone + ).strftime("%H:%M"), } headers = { diff --git a/src/summary/summary/api/route/tasks.py b/src/summary/summary/api/route/tasks.py index 56ba8608..7fdd0cdc 100644 --- a/src/summary/summary/api/route/tasks.py +++ b/src/summary/summary/api/route/tasks.py @@ -20,6 +20,9 @@ class TaskCreation(BaseModel): email: str sub: str version: Optional[int] = 2 + room: Optional[str] + recording_date: Optional[str] + recording_time: Optional[str] router = APIRouter(prefix="/tasks") @@ -34,7 +37,13 @@ async def create_task(request: TaskCreation): ) else: task = process_audio_transcribe_summarize_v2.delay( - request.filename, request.email, request.sub, time.time() + request.filename, + request.email, + request.sub, + time.time(), + request.room, + request.recording_date, + request.recording_time, ) return {"id": task.id, "message": "Task created"} diff --git a/src/summary/summary/core/celery_worker.py b/src/summary/summary/core/celery_worker.py index 61a1490a..58ed0dce 100644 --- a/src/summary/summary/core/celery_worker.py +++ b/src/summary/summary/core/celery_worker.py @@ -1,10 +1,13 @@ """Celery workers.""" +# ruff: noqa: PLR0913 + import json import os import tempfile import time from pathlib import Path +from typing import Optional import openai import sentry_sdk @@ -233,7 +236,14 @@ def process_audio_transcribe_summarize(filename: str, email: str, sub: str): max_retries=settings.celery_max_retries, ) def process_audio_transcribe_summarize_v2( - self, filename: str, email: str, sub: str, received_at: float + self, + filename: str, + email: str, + sub: str, + received_at: float, + room: Optional[str], + recording_date: Optional[str], + recording_time: Optional[str], ): """Process an audio file by transcribing it and generating a summary. @@ -269,7 +279,10 @@ def process_audio_transcribe_summarize_v2( audio_file = File(temp_file_path) metadata_manager.track(task_id, {"audio_length": audio_file.info.length}) - if settings.recording_max_duration is not None and audio_file.info.length > settings.recording_max_duration: + if ( + settings.recording_max_duration is not None + and audio_file.info.length > settings.recording_max_duration + ): error_msg = "Recording too long: %.2fs > %.2fs limit" % ( audio_file.info.length, settings.recording_max_duration, @@ -314,8 +327,16 @@ def process_audio_transcribe_summarize_v2( metadata_manager.track_transcription_metadata(task_id, transcription) + if not room or not recording_date or not recording_time: + title = settings.document_default_title + else: + title = settings.document_title_template.format( + room=room, + room_recording_date=recording_date, + room_recording_time=recording_time, + ) data = { - "title": settings.document_title, + "title": title, "content": formatted_transcription, "email": email, "sub": sub, diff --git a/src/summary/summary/core/config.py b/src/summary/summary/core/config.py index 7341cf95..6ef1055d 100644 --- a/src/summary/summary/core/config.py +++ b/src/summary/summary/core/config.py @@ -46,7 +46,10 @@ class Settings(BaseSettings): webhook_url: str # Output related settings - document_title: Optional[str] = "Transcription" + document_default_title: Optional[str] = "Transcription" + document_title_template: Optional[str] = ( + 'Réunion "{room}" du {room_recording_date} à {room_recording_time}' + ) # Sentry sentry_is_enabled: bool = False