📦️(summary) vendor existing logic for agentic system transition

Vendoring dead code before introducing new agent-based summarization architecture to maintain clean code.
2025-09-09 18:17:34 +02:00
parent 91a8d85db3
commit 0102b428f1
4 changed files with 11 additions and 147 deletions
--- a/src/summary/summary/api/route/tasks.py
+++ b/src/summary/summary/api/route/tasks.py
@@ -8,7 +8,6 @@ from fastapi import APIRouter
 from pydantic import BaseModel

 from summary.core.celery_worker import (
-    process_audio_transcribe_summarize,
    process_audio_transcribe_summarize_v2,
 )

@@ -31,22 +30,17 @@ router = APIRouter(prefix="/tasks")
@router.post("/")
 async def create_task(request: TaskCreation):
    """Create a task."""
-    if request.version == 1:
-        task = process_audio_transcribe_summarize.delay(
-            request.filename, request.email, request.sub
-        )
-    else:
-        task = process_audio_transcribe_summarize_v2.apply_async(
-            args=[
-                request.filename,
-                request.email,
-                request.sub,
-                time.time(),
-                request.room,
-                request.recording_date,
-                request.recording_time,
-            ]
-        )
+    task = process_audio_transcribe_summarize_v2.apply_async(
+        args=[
+            request.filename,
+            request.email,
+            request.sub,
+            time.time(),
+            request.room,
+            request.recording_date,
+            request.recording_time,
+        ]
+    )

    return {"id": task.id, "message": "Task created"}

--- a/src/summary/summary/core/celery_worker.py
+++ b/src/summary/summary/core/celery_worker.py
@@ -21,7 +21,6 @@ from urllib3.util import Retry

 from summary.core.analytics import MetadataManager, get_analytics
 from summary.core.config import get_settings
-from summary.core.prompt import get_instructions

 settings = get_settings()
 analytics = get_analytics()
@@ -156,82 +155,6 @@ def task_failure_handler(task_id, exception=None, **kwargs):
    metadata_manager.capture(task_id, settings.posthog_event_failure)


-@celery.task(max_retries=settings.celery_max_retries)
-def process_audio_transcribe_summarize(filename: str, email: str, sub: str):
-    """Process an audio file by transcribing it and generating a summary.
-
-    This Celery task performs the following operations:
-    1. Retrieves the audio file from MinIO storage
-    2. Transcribes the audio using OpenAI-compliant API's ASR model
-    3. Generates a summary of the transcription using OpenAI-compliant API's LLM
-    4. Sends the results via webhook
-    """
-    logger.info("Notification received")
-    logger.debug("filename: %s", filename)
-
-    minio_client = Minio(
-        settings.aws_s3_endpoint_url,
-        access_key=settings.aws_s3_access_key_id,
-        secret_key=settings.aws_s3_secret_access_key,
-        secure=settings.aws_s3_secure_access,
-    )
-
-    logger.debug("Connection to the Minio bucket successful")
-
-    audio_file_stream = minio_client.get_object(
-        settings.aws_storage_bucket_name, object_name=filename
-    )
-
-    temp_file_path = save_audio_stream(audio_file_stream)
-    logger.debug("Recording successfully downloaded, filepath: %s", temp_file_path)
-
-    logger.info("Initiating OpenAI client")
-
-    openai_client = openai.OpenAI(
-        api_key=settings.openai_api_key,
-        base_url=settings.openai_base_url,
-        max_retries=settings.openai_max_retries,
-    )
-
-    try:
-        logger.info("Querying transcription …")
-        with open(temp_file_path, "rb") as audio_file:
-            transcription = openai_client.audio.transcriptions.create(
-                model=settings.openai_asr_model, file=audio_file
-            )
-            transcription = transcription.text
-
-            logger.debug("Transcription: \n %s", transcription)
-    finally:
-        if os.path.exists(temp_file_path):
-            os.remove(temp_file_path)
-            logger.debug("Temporary file removed: %s", temp_file_path)
-
-    instructions = get_instructions(transcription)
-    summary_response = openai_client.chat.completions.create(
-        model=settings.openai_llm_model, messages=instructions
-    )
-
-    summary = summary_response.choices[0].message.content
-    logger.debug("Summary: \n %s", summary)
-
-    # fixme - generate a title using LLM
-    data = {
-        "title": "Votre résumé",
-        "content": summary,
-        "email": email,
-        "sub": sub,
-    }
-
-    logger.debug("Submitting webhook to %s", settings.webhook_url)
-    logger.debug("Request payload: %s", json.dumps(data, indent=2))
-
-    response = post_with_retries(settings.webhook_url, data)
-
-    logger.info("Webhook submitted successfully. Status: %s", response.status_code)
-    logger.debug("Response body: %s", response.text)
-
-
@celery.task(
    bind=True,
    autoretry_for=[exceptions.HTTPError],
--- a/src/summary/summary/core/config.py
+++ b/src/summary/summary/core/config.py
@@ -35,7 +35,6 @@ class Settings(BaseSettings):
    openai_api_key: str
    openai_base_url: str = "https://api.openai.com/v1"
    openai_asr_model: str = "whisper-1"
-    openai_llm_model: str = "gpt-4o"
    openai_max_retries: int = 0

    # Webhook-related settings
--- a/src/summary/summary/core/prompt.py
+++ b/src/summary/summary/core/prompt.py
@@ -1,52 +0,0 @@
-# ruff: noqa
-
-
-def get_instructions(transcript):
-    """Declare the summarize instructions."""
-    prompt = f"""
-    Audience: Coworkers.
-
-    **Do:**
-    - Detect the language of the transcript and provide your entire response in the same language.
-    - If any part of the transcript is unclear or lacks detail, politely inform the user, specifying which areas need further clarification.
-    - Ensure the accuracy of all information and refrain from adding unverified details.
-    - Format the response using proper markdown and structured sections.
-    - Be concise and avoid repeating yourself between the sections.
-    - Be super precise on nickname
-    - Be a nit-picker
-    - Auto-evaluate your response
-
-    **Don't:**
-    - Write something your are not sure.
-    - Write something that is not mention in the transcript.
-    - Don't make mistake while mentioning someone
-    **Task:**
-    Summarize the provided meeting transcript into clear and well-organized meeting minutes. The summary should be structured into the following sections, excluding irrelevant or inapplicable details:
-
-    1. **Summary**: Write a  TL;DR of the meeting.
-    2. **Subjects Discussed**: List the key points or issues in bullet points.
-    4. **Next Steps**: Provide action items as bullet points, assigning each task to a responsible individual and including deadlines (if mentioned). Format action items as tickable checkboxes. Ensure every action is assigned and, if a deadline is provided, that it is clearly stated.
-
-    **Transcript**:  
-    {transcript}
-
-    **Response:**
-
-    ### Summary [Translate this title based on the transcript’s language]
-    [Provide a brief overview of the key points discussed]
-
-    ### Subjects Discussed [Translate this title based on the transcript’s language]
-    - [Summarize each topic concisely]
-
-    ### Next Steps [Translate this title based on the transcript’s language]  
-    - [ ] Action item [Assign to the responsible individual(s) and include a deadline if applicable, follow this strict format: Action - List of owner(s), deadline.]
-
-    """
-
-    return [
-        {
-            "role": "system",
-            "content": "You are a concise and structured assistant, that summarizes meeting transcripts.",
-        },
-        {"role": "user", "content": prompt},
-    ]