📦️(summary) vendor existing logic for agentic system transition

Vendoring dead code before introducing new agent-based
summarization architecture to maintain clean code.
This commit is contained in:
lebaudantoine
2025-09-09 18:17:34 +02:00
committed by aleb_the_flash
parent 91a8d85db3
commit 0102b428f1
4 changed files with 11 additions and 147 deletions

View File

@@ -8,7 +8,6 @@ from fastapi import APIRouter
from pydantic import BaseModel
from summary.core.celery_worker import (
process_audio_transcribe_summarize,
process_audio_transcribe_summarize_v2,
)
@@ -31,22 +30,17 @@ router = APIRouter(prefix="/tasks")
@router.post("/")
async def create_task(request: TaskCreation):
"""Create a task."""
if request.version == 1:
task = process_audio_transcribe_summarize.delay(
request.filename, request.email, request.sub
)
else:
task = process_audio_transcribe_summarize_v2.apply_async(
args=[
request.filename,
request.email,
request.sub,
time.time(),
request.room,
request.recording_date,
request.recording_time,
]
)
task = process_audio_transcribe_summarize_v2.apply_async(
args=[
request.filename,
request.email,
request.sub,
time.time(),
request.room,
request.recording_date,
request.recording_time,
]
)
return {"id": task.id, "message": "Task created"}

View File

@@ -21,7 +21,6 @@ from urllib3.util import Retry
from summary.core.analytics import MetadataManager, get_analytics
from summary.core.config import get_settings
from summary.core.prompt import get_instructions
settings = get_settings()
analytics = get_analytics()
@@ -156,82 +155,6 @@ def task_failure_handler(task_id, exception=None, **kwargs):
metadata_manager.capture(task_id, settings.posthog_event_failure)
@celery.task(max_retries=settings.celery_max_retries)
def process_audio_transcribe_summarize(filename: str, email: str, sub: str):
"""Process an audio file by transcribing it and generating a summary.
This Celery task performs the following operations:
1. Retrieves the audio file from MinIO storage
2. Transcribes the audio using OpenAI-compliant API's ASR model
3. Generates a summary of the transcription using OpenAI-compliant API's LLM
4. Sends the results via webhook
"""
logger.info("Notification received")
logger.debug("filename: %s", filename)
minio_client = Minio(
settings.aws_s3_endpoint_url,
access_key=settings.aws_s3_access_key_id,
secret_key=settings.aws_s3_secret_access_key,
secure=settings.aws_s3_secure_access,
)
logger.debug("Connection to the Minio bucket successful")
audio_file_stream = minio_client.get_object(
settings.aws_storage_bucket_name, object_name=filename
)
temp_file_path = save_audio_stream(audio_file_stream)
logger.debug("Recording successfully downloaded, filepath: %s", temp_file_path)
logger.info("Initiating OpenAI client")
openai_client = openai.OpenAI(
api_key=settings.openai_api_key,
base_url=settings.openai_base_url,
max_retries=settings.openai_max_retries,
)
try:
logger.info("Querying transcription …")
with open(temp_file_path, "rb") as audio_file:
transcription = openai_client.audio.transcriptions.create(
model=settings.openai_asr_model, file=audio_file
)
transcription = transcription.text
logger.debug("Transcription: \n %s", transcription)
finally:
if os.path.exists(temp_file_path):
os.remove(temp_file_path)
logger.debug("Temporary file removed: %s", temp_file_path)
instructions = get_instructions(transcription)
summary_response = openai_client.chat.completions.create(
model=settings.openai_llm_model, messages=instructions
)
summary = summary_response.choices[0].message.content
logger.debug("Summary: \n %s", summary)
# fixme - generate a title using LLM
data = {
"title": "Votre résumé",
"content": summary,
"email": email,
"sub": sub,
}
logger.debug("Submitting webhook to %s", settings.webhook_url)
logger.debug("Request payload: %s", json.dumps(data, indent=2))
response = post_with_retries(settings.webhook_url, data)
logger.info("Webhook submitted successfully. Status: %s", response.status_code)
logger.debug("Response body: %s", response.text)
@celery.task(
bind=True,
autoretry_for=[exceptions.HTTPError],

View File

@@ -35,7 +35,6 @@ class Settings(BaseSettings):
openai_api_key: str
openai_base_url: str = "https://api.openai.com/v1"
openai_asr_model: str = "whisper-1"
openai_llm_model: str = "gpt-4o"
openai_max_retries: int = 0
# Webhook-related settings

View File

@@ -1,52 +0,0 @@
# ruff: noqa
def get_instructions(transcript):
"""Declare the summarize instructions."""
prompt = f"""
Audience: Coworkers.
**Do:**
- Detect the language of the transcript and provide your entire response in the same language.
- If any part of the transcript is unclear or lacks detail, politely inform the user, specifying which areas need further clarification.
- Ensure the accuracy of all information and refrain from adding unverified details.
- Format the response using proper markdown and structured sections.
- Be concise and avoid repeating yourself between the sections.
- Be super precise on nickname
- Be a nit-picker
- Auto-evaluate your response
**Don't:**
- Write something your are not sure.
- Write something that is not mention in the transcript.
- Don't make mistake while mentioning someone
**Task:**
Summarize the provided meeting transcript into clear and well-organized meeting minutes. The summary should be structured into the following sections, excluding irrelevant or inapplicable details:
1. **Summary**: Write a TL;DR of the meeting.
2. **Subjects Discussed**: List the key points or issues in bullet points.
4. **Next Steps**: Provide action items as bullet points, assigning each task to a responsible individual and including deadlines (if mentioned). Format action items as tickable checkboxes. Ensure every action is assigned and, if a deadline is provided, that it is clearly stated.
**Transcript**:
{transcript}
**Response:**
### Summary [Translate this title based on the transcripts language]
[Provide a brief overview of the key points discussed]
### Subjects Discussed [Translate this title based on the transcripts language]
- [Summarize each topic concisely]
### Next Steps [Translate this title based on the transcripts language]
- [ ] Action item [Assign to the responsible individual(s) and include a deadline if applicable, follow this strict format: Action - List of owner(s), deadline.]
"""
return [
{
"role": "system",
"content": "You are a concise and structured assistant, that summarizes meeting transcripts.",
},
{"role": "user", "content": prompt},
]