📦️(summary) vendor existing logic for agentic system transition

Vendoring dead code before introducing new agent-based summarization architecture to maintain clean code.
2025-09-09 18:17:34 +02:00
parent 91a8d85db3
commit 0102b428f1
4 changed files with 11 additions and 147 deletions
--- a/src/summary/summary/api/route/tasks.py
+++ b/src/summary/summary/api/route/tasks.py
@@ -8,7 +8,6 @@ from fastapi import APIRouter
 from pydantic import BaseModel
 from summary.core.celery_worker import (
    process_audio_transcribe_summarize,
    process_audio_transcribe_summarize_v2,
 )
@@ -31,22 +30,17 @@ router = APIRouter(prefix="/tasks")
@router.post("/")
 async def create_task(request: TaskCreation):
    """Create a task."""
-    if request.version == 1:
+    task = process_audio_transcribe_summarize_v2.apply_async(
-        task = process_audio_transcribe_summarize.delay(
+        args=[
-            request.filename, request.email, request.sub
+            request.filename,
-        )
+            request.email,
-    else:
+            request.sub,
-        task = process_audio_transcribe_summarize_v2.apply_async(
+            time.time(),
-            args=[
+            request.room,
-                request.filename,
+            request.recording_date,
-                request.email,
+            request.recording_time,
-                request.sub,
+        ]
-                time.time(),
+    )
                request.room,
                request.recording_date,
                request.recording_time,
            ]
        )
    return {"id": task.id, "message": "Task created"}
--- a/src/summary/summary/core/celery_worker.py
+++ b/src/summary/summary/core/celery_worker.py
@@ -21,7 +21,6 @@ from urllib3.util import Retry
 from summary.core.analytics import MetadataManager, get_analytics
 from summary.core.config import get_settings
 from summary.core.prompt import get_instructions
 settings = get_settings()
 analytics = get_analytics()
@@ -156,82 +155,6 @@ def task_failure_handler(task_id, exception=None, **kwargs):
    metadata_manager.capture(task_id, settings.posthog_event_failure)
@celery.task(max_retries=settings.celery_max_retries)
 def process_audio_transcribe_summarize(filename: str, email: str, sub: str):
    """Process an audio file by transcribing it and generating a summary.
    This Celery task performs the following operations:
    1. Retrieves the audio file from MinIO storage
    2. Transcribes the audio using OpenAI-compliant API's ASR model
    3. Generates a summary of the transcription using OpenAI-compliant API's LLM
    4. Sends the results via webhook
    """
    logger.info("Notification received")
    logger.debug("filename: %s", filename)
    minio_client = Minio(
        settings.aws_s3_endpoint_url,
        access_key=settings.aws_s3_access_key_id,
        secret_key=settings.aws_s3_secret_access_key,
        secure=settings.aws_s3_secure_access,
    )
    logger.debug("Connection to the Minio bucket successful")
    audio_file_stream = minio_client.get_object(
        settings.aws_storage_bucket_name, object_name=filename
    )
    temp_file_path = save_audio_stream(audio_file_stream)
    logger.debug("Recording successfully downloaded, filepath: %s", temp_file_path)
    logger.info("Initiating OpenAI client")
    openai_client = openai.OpenAI(
        api_key=settings.openai_api_key,
        base_url=settings.openai_base_url,
        max_retries=settings.openai_max_retries,
    )
    try:
        logger.info("Querying transcription …")
        with open(temp_file_path, "rb") as audio_file:
            transcription = openai_client.audio.transcriptions.create(
                model=settings.openai_asr_model, file=audio_file
            )
            transcription = transcription.text
            logger.debug("Transcription: \n %s", transcription)
    finally:
        if os.path.exists(temp_file_path):
            os.remove(temp_file_path)
            logger.debug("Temporary file removed: %s", temp_file_path)
    instructions = get_instructions(transcription)
    summary_response = openai_client.chat.completions.create(
        model=settings.openai_llm_model, messages=instructions
    )
    summary = summary_response.choices[0].message.content
    logger.debug("Summary: \n %s", summary)
    # fixme - generate a title using LLM
    data = {
        "title": "Votre résumé",
        "content": summary,
        "email": email,
        "sub": sub,
    }
    logger.debug("Submitting webhook to %s", settings.webhook_url)
    logger.debug("Request payload: %s", json.dumps(data, indent=2))
    response = post_with_retries(settings.webhook_url, data)
    logger.info("Webhook submitted successfully. Status: %s", response.status_code)
    logger.debug("Response body: %s", response.text)
@celery.task(
    bind=True,
    autoretry_for=[exceptions.HTTPError],
--- a/src/summary/summary/core/config.py
+++ b/src/summary/summary/core/config.py
@@ -35,7 +35,6 @@ class Settings(BaseSettings):
    openai_api_key: str
    openai_base_url: str = "https://api.openai.com/v1"
    openai_asr_model: str = "whisper-1"
    openai_llm_model: str = "gpt-4o"
    openai_max_retries: int = 0
    # Webhook-related settings
--- a/src/summary/summary/core/prompt.py
+++ b/src/summary/summary/core/prompt.py
@@ -1,52 +0,0 @@
 # ruff: noqa
 def get_instructions(transcript):
    """Declare the summarize instructions."""
    prompt = f"""
    Audience: Coworkers.
    **Do:**
    - Detect the language of the transcript and provide your entire response in the same language.
    - If any part of the transcript is unclear or lacks detail, politely inform the user, specifying which areas need further clarification.
    - Ensure the accuracy of all information and refrain from adding unverified details.
    - Format the response using proper markdown and structured sections.
    - Be concise and avoid repeating yourself between the sections.
    - Be super precise on nickname
    - Be a nit-picker
    - Auto-evaluate your response
    **Don't:**
    - Write something your are not sure.
    - Write something that is not mention in the transcript.
    - Don't make mistake while mentioning someone
    **Task:**
    Summarize the provided meeting transcript into clear and well-organized meeting minutes. The summary should be structured into the following sections, excluding irrelevant or inapplicable details:
    1. **Summary**: Write a  TL;DR of the meeting.
    2. **Subjects Discussed**: List the key points or issues in bullet points.
    4. **Next Steps**: Provide action items as bullet points, assigning each task to a responsible individual and including deadlines (if mentioned). Format action items as tickable checkboxes. Ensure every action is assigned and, if a deadline is provided, that it is clearly stated.
    **Transcript**:  
    {transcript}
    **Response:**
    ### Summary [Translate this title based on the transcript’s language]
    [Provide a brief overview of the key points discussed]
    ### Subjects Discussed [Translate this title based on the transcript’s language]
    - [Summarize each topic concisely]
    ### Next Steps [Translate this title based on the transcript’s language]  
    - [ ] Action item [Assign to the responsible individual(s) and include a deadline if applicable, follow this strict format: Action - List of owner(s), deadline.]
    """
    return [
        {
            "role": "system",
            "content": "You are a concise and structured assistant, that summarizes meeting transcripts.",
        },
        {"role": "user", "content": prompt},
    ]