🐛(summary) add audio duration limit to prevent long-running jobs

Set default 1h30 limit for audio processing to prevent Whisper from
running excessively long on large recordings. Improves resource
management and job completion times.
This commit is contained in:
lebaudantoine
2025-07-10 14:58:27 +02:00
committed by aleb_the_flash
parent 30cd6573ef
commit cbabcb877b
2 changed files with 17 additions and 0 deletions

View File

@@ -64,6 +64,12 @@ Quelques points que nous vous conseillons de vérifier :
"""
class AudioValidationError(Exception):
"""Custom exception for audio validation errors."""
pass
def save_audio_stream(audio_stream, chunk_size=32 * 1024):
"""Save an audio stream to a temporary OGG file."""
with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as tmp:
@@ -263,6 +269,14 @@ def process_audio_transcribe_summarize_v2(
audio_file = File(temp_file_path)
tasks_tracker.track(task_id, {"audio_length": audio_file.info.length})
if audio_file.info.length > settings.recording_max_duration:
error_msg = "Recording too long: %.2fs > %.2fs limit" % (
audio_file.info.length,
settings.recording_max_duration,
)
logger.error(error_msg)
raise AudioValidationError(error_msg)
logger.info("Initiating OpenAI client")
openai_client = openai.OpenAI(
api_key=settings.openai_api_key,

View File

@@ -16,6 +16,9 @@ class Settings(BaseSettings):
app_api_v1_str: str = "/api/v1"
app_api_token: str
# Audio recordings
recording_max_duration: int = 5400 # 1h30
# Celery settings
celery_broker_url: str = "redis://redis/0"
celery_result_backend: str = "redis://redis/0"