From 0102b428f1014674c551532e17b97c13ef873850 Mon Sep 17 00:00:00 2001 From: lebaudantoine Date: Tue, 9 Sep 2025 18:17:34 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=A6=EF=B8=8F(summary)=20vendor=20exist?= =?UTF-8?q?ing=20logic=20for=20agentic=20system=20transition?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vendoring dead code before introducing new agent-based summarization architecture to maintain clean code. --- src/summary/summary/api/route/tasks.py | 28 ++++----- src/summary/summary/core/celery_worker.py | 77 ----------------------- src/summary/summary/core/config.py | 1 - src/summary/summary/core/prompt.py | 52 --------------- 4 files changed, 11 insertions(+), 147 deletions(-) delete mode 100644 src/summary/summary/core/prompt.py diff --git a/src/summary/summary/api/route/tasks.py b/src/summary/summary/api/route/tasks.py index b6c1d19e..bc9a89c4 100644 --- a/src/summary/summary/api/route/tasks.py +++ b/src/summary/summary/api/route/tasks.py @@ -8,7 +8,6 @@ from fastapi import APIRouter from pydantic import BaseModel from summary.core.celery_worker import ( - process_audio_transcribe_summarize, process_audio_transcribe_summarize_v2, ) @@ -31,22 +30,17 @@ router = APIRouter(prefix="/tasks") @router.post("/") async def create_task(request: TaskCreation): """Create a task.""" - if request.version == 1: - task = process_audio_transcribe_summarize.delay( - request.filename, request.email, request.sub - ) - else: - task = process_audio_transcribe_summarize_v2.apply_async( - args=[ - request.filename, - request.email, - request.sub, - time.time(), - request.room, - request.recording_date, - request.recording_time, - ] - ) + task = process_audio_transcribe_summarize_v2.apply_async( + args=[ + request.filename, + request.email, + request.sub, + time.time(), + request.room, + request.recording_date, + request.recording_time, + ] + ) return {"id": task.id, "message": "Task created"} diff --git a/src/summary/summary/core/celery_worker.py b/src/summary/summary/core/celery_worker.py index 187de4b9..0f97fc40 100644 --- a/src/summary/summary/core/celery_worker.py +++ b/src/summary/summary/core/celery_worker.py @@ -21,7 +21,6 @@ from urllib3.util import Retry from summary.core.analytics import MetadataManager, get_analytics from summary.core.config import get_settings -from summary.core.prompt import get_instructions settings = get_settings() analytics = get_analytics() @@ -156,82 +155,6 @@ def task_failure_handler(task_id, exception=None, **kwargs): metadata_manager.capture(task_id, settings.posthog_event_failure) -@celery.task(max_retries=settings.celery_max_retries) -def process_audio_transcribe_summarize(filename: str, email: str, sub: str): - """Process an audio file by transcribing it and generating a summary. - - This Celery task performs the following operations: - 1. Retrieves the audio file from MinIO storage - 2. Transcribes the audio using OpenAI-compliant API's ASR model - 3. Generates a summary of the transcription using OpenAI-compliant API's LLM - 4. Sends the results via webhook - """ - logger.info("Notification received") - logger.debug("filename: %s", filename) - - minio_client = Minio( - settings.aws_s3_endpoint_url, - access_key=settings.aws_s3_access_key_id, - secret_key=settings.aws_s3_secret_access_key, - secure=settings.aws_s3_secure_access, - ) - - logger.debug("Connection to the Minio bucket successful") - - audio_file_stream = minio_client.get_object( - settings.aws_storage_bucket_name, object_name=filename - ) - - temp_file_path = save_audio_stream(audio_file_stream) - logger.debug("Recording successfully downloaded, filepath: %s", temp_file_path) - - logger.info("Initiating OpenAI client") - - openai_client = openai.OpenAI( - api_key=settings.openai_api_key, - base_url=settings.openai_base_url, - max_retries=settings.openai_max_retries, - ) - - try: - logger.info("Querying transcription …") - with open(temp_file_path, "rb") as audio_file: - transcription = openai_client.audio.transcriptions.create( - model=settings.openai_asr_model, file=audio_file - ) - transcription = transcription.text - - logger.debug("Transcription: \n %s", transcription) - finally: - if os.path.exists(temp_file_path): - os.remove(temp_file_path) - logger.debug("Temporary file removed: %s", temp_file_path) - - instructions = get_instructions(transcription) - summary_response = openai_client.chat.completions.create( - model=settings.openai_llm_model, messages=instructions - ) - - summary = summary_response.choices[0].message.content - logger.debug("Summary: \n %s", summary) - - # fixme - generate a title using LLM - data = { - "title": "Votre résumé", - "content": summary, - "email": email, - "sub": sub, - } - - logger.debug("Submitting webhook to %s", settings.webhook_url) - logger.debug("Request payload: %s", json.dumps(data, indent=2)) - - response = post_with_retries(settings.webhook_url, data) - - logger.info("Webhook submitted successfully. Status: %s", response.status_code) - logger.debug("Response body: %s", response.text) - - @celery.task( bind=True, autoretry_for=[exceptions.HTTPError], diff --git a/src/summary/summary/core/config.py b/src/summary/summary/core/config.py index 6ef1055d..e2b24cc8 100644 --- a/src/summary/summary/core/config.py +++ b/src/summary/summary/core/config.py @@ -35,7 +35,6 @@ class Settings(BaseSettings): openai_api_key: str openai_base_url: str = "https://api.openai.com/v1" openai_asr_model: str = "whisper-1" - openai_llm_model: str = "gpt-4o" openai_max_retries: int = 0 # Webhook-related settings diff --git a/src/summary/summary/core/prompt.py b/src/summary/summary/core/prompt.py deleted file mode 100644 index 463c9ac5..00000000 --- a/src/summary/summary/core/prompt.py +++ /dev/null @@ -1,52 +0,0 @@ -# ruff: noqa - - -def get_instructions(transcript): - """Declare the summarize instructions.""" - prompt = f""" - Audience: Coworkers. - - **Do:** - - Detect the language of the transcript and provide your entire response in the same language. - - If any part of the transcript is unclear or lacks detail, politely inform the user, specifying which areas need further clarification. - - Ensure the accuracy of all information and refrain from adding unverified details. - - Format the response using proper markdown and structured sections. - - Be concise and avoid repeating yourself between the sections. - - Be super precise on nickname - - Be a nit-picker - - Auto-evaluate your response - - **Don't:** - - Write something your are not sure. - - Write something that is not mention in the transcript. - - Don't make mistake while mentioning someone - **Task:** - Summarize the provided meeting transcript into clear and well-organized meeting minutes. The summary should be structured into the following sections, excluding irrelevant or inapplicable details: - - 1. **Summary**: Write a TL;DR of the meeting. - 2. **Subjects Discussed**: List the key points or issues in bullet points. - 4. **Next Steps**: Provide action items as bullet points, assigning each task to a responsible individual and including deadlines (if mentioned). Format action items as tickable checkboxes. Ensure every action is assigned and, if a deadline is provided, that it is clearly stated. - - **Transcript**: - {transcript} - - **Response:** - - ### Summary [Translate this title based on the transcript’s language] - [Provide a brief overview of the key points discussed] - - ### Subjects Discussed [Translate this title based on the transcript’s language] - - [Summarize each topic concisely] - - ### Next Steps [Translate this title based on the transcript’s language] - - [ ] Action item [Assign to the responsible individual(s) and include a deadline if applicable, follow this strict format: Action - List of owner(s), deadline.] - - """ - - return [ - { - "role": "system", - "content": "You are a concise and structured assistant, that summarizes meeting transcripts.", - }, - {"role": "user", "content": prompt}, - ]