📦️(summary) vendor existing logic for agentic system transition
Vendoring dead code before introducing new agent-based summarization architecture to maintain clean code.
This commit is contained in:
committed by
aleb_the_flash
parent
91a8d85db3
commit
0102b428f1
@@ -8,7 +8,6 @@ from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
|
||||
from summary.core.celery_worker import (
|
||||
process_audio_transcribe_summarize,
|
||||
process_audio_transcribe_summarize_v2,
|
||||
)
|
||||
|
||||
@@ -31,22 +30,17 @@ router = APIRouter(prefix="/tasks")
|
||||
@router.post("/")
|
||||
async def create_task(request: TaskCreation):
|
||||
"""Create a task."""
|
||||
if request.version == 1:
|
||||
task = process_audio_transcribe_summarize.delay(
|
||||
request.filename, request.email, request.sub
|
||||
)
|
||||
else:
|
||||
task = process_audio_transcribe_summarize_v2.apply_async(
|
||||
args=[
|
||||
request.filename,
|
||||
request.email,
|
||||
request.sub,
|
||||
time.time(),
|
||||
request.room,
|
||||
request.recording_date,
|
||||
request.recording_time,
|
||||
]
|
||||
)
|
||||
task = process_audio_transcribe_summarize_v2.apply_async(
|
||||
args=[
|
||||
request.filename,
|
||||
request.email,
|
||||
request.sub,
|
||||
time.time(),
|
||||
request.room,
|
||||
request.recording_date,
|
||||
request.recording_time,
|
||||
]
|
||||
)
|
||||
|
||||
return {"id": task.id, "message": "Task created"}
|
||||
|
||||
|
||||
@@ -21,7 +21,6 @@ from urllib3.util import Retry
|
||||
|
||||
from summary.core.analytics import MetadataManager, get_analytics
|
||||
from summary.core.config import get_settings
|
||||
from summary.core.prompt import get_instructions
|
||||
|
||||
settings = get_settings()
|
||||
analytics = get_analytics()
|
||||
@@ -156,82 +155,6 @@ def task_failure_handler(task_id, exception=None, **kwargs):
|
||||
metadata_manager.capture(task_id, settings.posthog_event_failure)
|
||||
|
||||
|
||||
@celery.task(max_retries=settings.celery_max_retries)
|
||||
def process_audio_transcribe_summarize(filename: str, email: str, sub: str):
|
||||
"""Process an audio file by transcribing it and generating a summary.
|
||||
|
||||
This Celery task performs the following operations:
|
||||
1. Retrieves the audio file from MinIO storage
|
||||
2. Transcribes the audio using OpenAI-compliant API's ASR model
|
||||
3. Generates a summary of the transcription using OpenAI-compliant API's LLM
|
||||
4. Sends the results via webhook
|
||||
"""
|
||||
logger.info("Notification received")
|
||||
logger.debug("filename: %s", filename)
|
||||
|
||||
minio_client = Minio(
|
||||
settings.aws_s3_endpoint_url,
|
||||
access_key=settings.aws_s3_access_key_id,
|
||||
secret_key=settings.aws_s3_secret_access_key,
|
||||
secure=settings.aws_s3_secure_access,
|
||||
)
|
||||
|
||||
logger.debug("Connection to the Minio bucket successful")
|
||||
|
||||
audio_file_stream = minio_client.get_object(
|
||||
settings.aws_storage_bucket_name, object_name=filename
|
||||
)
|
||||
|
||||
temp_file_path = save_audio_stream(audio_file_stream)
|
||||
logger.debug("Recording successfully downloaded, filepath: %s", temp_file_path)
|
||||
|
||||
logger.info("Initiating OpenAI client")
|
||||
|
||||
openai_client = openai.OpenAI(
|
||||
api_key=settings.openai_api_key,
|
||||
base_url=settings.openai_base_url,
|
||||
max_retries=settings.openai_max_retries,
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info("Querying transcription …")
|
||||
with open(temp_file_path, "rb") as audio_file:
|
||||
transcription = openai_client.audio.transcriptions.create(
|
||||
model=settings.openai_asr_model, file=audio_file
|
||||
)
|
||||
transcription = transcription.text
|
||||
|
||||
logger.debug("Transcription: \n %s", transcription)
|
||||
finally:
|
||||
if os.path.exists(temp_file_path):
|
||||
os.remove(temp_file_path)
|
||||
logger.debug("Temporary file removed: %s", temp_file_path)
|
||||
|
||||
instructions = get_instructions(transcription)
|
||||
summary_response = openai_client.chat.completions.create(
|
||||
model=settings.openai_llm_model, messages=instructions
|
||||
)
|
||||
|
||||
summary = summary_response.choices[0].message.content
|
||||
logger.debug("Summary: \n %s", summary)
|
||||
|
||||
# fixme - generate a title using LLM
|
||||
data = {
|
||||
"title": "Votre résumé",
|
||||
"content": summary,
|
||||
"email": email,
|
||||
"sub": sub,
|
||||
}
|
||||
|
||||
logger.debug("Submitting webhook to %s", settings.webhook_url)
|
||||
logger.debug("Request payload: %s", json.dumps(data, indent=2))
|
||||
|
||||
response = post_with_retries(settings.webhook_url, data)
|
||||
|
||||
logger.info("Webhook submitted successfully. Status: %s", response.status_code)
|
||||
logger.debug("Response body: %s", response.text)
|
||||
|
||||
|
||||
@celery.task(
|
||||
bind=True,
|
||||
autoretry_for=[exceptions.HTTPError],
|
||||
|
||||
@@ -35,7 +35,6 @@ class Settings(BaseSettings):
|
||||
openai_api_key: str
|
||||
openai_base_url: str = "https://api.openai.com/v1"
|
||||
openai_asr_model: str = "whisper-1"
|
||||
openai_llm_model: str = "gpt-4o"
|
||||
openai_max_retries: int = 0
|
||||
|
||||
# Webhook-related settings
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
# ruff: noqa
|
||||
|
||||
|
||||
def get_instructions(transcript):
|
||||
"""Declare the summarize instructions."""
|
||||
prompt = f"""
|
||||
Audience: Coworkers.
|
||||
|
||||
**Do:**
|
||||
- Detect the language of the transcript and provide your entire response in the same language.
|
||||
- If any part of the transcript is unclear or lacks detail, politely inform the user, specifying which areas need further clarification.
|
||||
- Ensure the accuracy of all information and refrain from adding unverified details.
|
||||
- Format the response using proper markdown and structured sections.
|
||||
- Be concise and avoid repeating yourself between the sections.
|
||||
- Be super precise on nickname
|
||||
- Be a nit-picker
|
||||
- Auto-evaluate your response
|
||||
|
||||
**Don't:**
|
||||
- Write something your are not sure.
|
||||
- Write something that is not mention in the transcript.
|
||||
- Don't make mistake while mentioning someone
|
||||
**Task:**
|
||||
Summarize the provided meeting transcript into clear and well-organized meeting minutes. The summary should be structured into the following sections, excluding irrelevant or inapplicable details:
|
||||
|
||||
1. **Summary**: Write a TL;DR of the meeting.
|
||||
2. **Subjects Discussed**: List the key points or issues in bullet points.
|
||||
4. **Next Steps**: Provide action items as bullet points, assigning each task to a responsible individual and including deadlines (if mentioned). Format action items as tickable checkboxes. Ensure every action is assigned and, if a deadline is provided, that it is clearly stated.
|
||||
|
||||
**Transcript**:
|
||||
{transcript}
|
||||
|
||||
**Response:**
|
||||
|
||||
### Summary [Translate this title based on the transcript’s language]
|
||||
[Provide a brief overview of the key points discussed]
|
||||
|
||||
### Subjects Discussed [Translate this title based on the transcript’s language]
|
||||
- [Summarize each topic concisely]
|
||||
|
||||
### Next Steps [Translate this title based on the transcript’s language]
|
||||
- [ ] Action item [Assign to the responsible individual(s) and include a deadline if applicable, follow this strict format: Action - List of owner(s), deadline.]
|
||||
|
||||
"""
|
||||
|
||||
return [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a concise and structured assistant, that summarizes meeting transcripts.",
|
||||
},
|
||||
{"role": "user", "content": prompt},
|
||||
]
|
||||
Reference in New Issue
Block a user