(summary) link transcript to their downloadable recording

Link the transcription document to its related recording by adding a short
header explaining that users can download the audio file via a dedicated link.

This was a highly requested feature, as many users need to keep their audio
files.

As part of a small refactor, remove the argument length check in the metadata
analytics class. The hardcoded argument count made code evolution harder and was
easy to forget updating. Argument unwrapping remains fragile and should be
redesigned later to be more robust.

The backend is responsible for generating the download link to ensure
consistency and reliability.

I tried adding a divider, but the Markdown-to-Yjs conversion is very lossy and
almost never handles it correctly. Only about one out of ten conversions works
as expected.
This commit is contained in:
lebaudantoine
2026-01-03 01:12:09 +01:00
committed by aleb_the_flash
parent f7b45622bc
commit 39271544d7
10 changed files with 45 additions and 10 deletions

View File

@@ -27,6 +27,7 @@ class TaskCreation(BaseModel):
recording_date: Optional[str]
recording_time: Optional[str]
language: Optional[str]
download_link: Optional[str]
@field_validator("language")
@classmethod
@@ -57,6 +58,7 @@ async def create_task(request: TaskCreation):
request.recording_date,
request.recording_time,
request.language,
request.download_link,
],
queue=settings.transcribe_queue,
)

View File

@@ -118,11 +118,6 @@ class MetadataManager:
"retries": 0,
}
_required_args_count = 9
if len(task_args) != _required_args_count:
logger.error("Invalid number of arguments to enable metadata manager.")
return
_, filename, email, _, received_at, *_ = task_args
initial_metadata = {

View File

@@ -120,6 +120,7 @@ def process_audio_transcribe_summarize_v2(
recording_date: Optional[str],
recording_time: Optional[str],
language: Optional[str],
download_link: Optional[str],
):
"""Process an audio file by transcribing it and generating a summary.
@@ -184,6 +185,7 @@ def process_audio_transcribe_summarize_v2(
room=room,
recording_date=recording_date,
recording_time=recording_time,
download_link=download_link,
)
data = {

View File

@@ -65,6 +65,7 @@ class TranscriptFormatter:
room: Optional[str] = None,
recording_date: Optional[str] = None,
recording_time: Optional[str] = None,
download_link: Optional[str] = None,
) -> Tuple[str, str]:
"""Format transcription into the final document and its title."""
segments = self._get_segments(transcription)
@@ -74,6 +75,7 @@ class TranscriptFormatter:
else:
content = self._format_speaker(segments)
content = self._remove_hallucinations(content)
content = self._add_header(content, download_link)
title = self._generate_title(room, recording_date, recording_time)
@@ -104,6 +106,19 @@ class TranscriptFormatter:
return formatted_output
def _add_header(self, content, download_link: Optional[str]) -> str:
"""Add download link header to the document content."""
if not download_link:
return content
header = (
f"\n*Télécharger votre enregistrement "
f"en [suivant ce lien]({download_link})*\n"
)
content = header + content
return content
def _generate_title(
self,
room: Optional[str] = None,