From cbabcb877baaefaed8ddb641c6537ee7182606b7 Mon Sep 17 00:00:00 2001 From: lebaudantoine Date: Thu, 10 Jul 2025 14:58:27 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B(summary)=20add=20audio=20duration?= =?UTF-8?q?=20limit=20to=20prevent=20long-running=20jobs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set default 1h30 limit for audio processing to prevent Whisper from running excessively long on large recordings. Improves resource management and job completion times. --- src/summary/summary/core/celery_worker.py | 14 ++++++++++++++ src/summary/summary/core/config.py | 3 +++ 2 files changed, 17 insertions(+) diff --git a/src/summary/summary/core/celery_worker.py b/src/summary/summary/core/celery_worker.py index a34a8350..4a4f8b63 100644 --- a/src/summary/summary/core/celery_worker.py +++ b/src/summary/summary/core/celery_worker.py @@ -64,6 +64,12 @@ Quelques points que nous vous conseillons de vérifier : """ +class AudioValidationError(Exception): + """Custom exception for audio validation errors.""" + + pass + + def save_audio_stream(audio_stream, chunk_size=32 * 1024): """Save an audio stream to a temporary OGG file.""" with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp: @@ -263,6 +269,14 @@ def process_audio_transcribe_summarize_v2( audio_file = File(temp_file_path) tasks_tracker.track(task_id, {"audio_length": audio_file.info.length}) + if audio_file.info.length > settings.recording_max_duration: + error_msg = "Recording too long: %.2fs > %.2fs limit" % ( + audio_file.info.length, + settings.recording_max_duration, + ) + logger.error(error_msg) + raise AudioValidationError(error_msg) + logger.info("Initiating OpenAI client") openai_client = openai.OpenAI( api_key=settings.openai_api_key, diff --git a/src/summary/summary/core/config.py b/src/summary/summary/core/config.py index cf0dddee..f4cd8a0a 100644 --- a/src/summary/summary/core/config.py +++ b/src/summary/summary/core/config.py @@ -16,6 +16,9 @@ class Settings(BaseSettings): app_api_v1_str: str = "/api/v1" app_api_token: str + # Audio recordings + recording_max_duration: int = 5400 # 1h30 + # Celery settings celery_broker_url: str = "redis://redis/0" celery_result_backend: str = "redis://redis/0"