📦️(summary) vendor existing logic for agentic system transition
Vendoring dead code before introducing new agent-based summarization architecture to maintain clean code.
This commit is contained in:
committed by
aleb_the_flash
parent
91a8d85db3
commit
0102b428f1
@@ -8,7 +8,6 @@ from fastapi import APIRouter
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from summary.core.celery_worker import (
|
from summary.core.celery_worker import (
|
||||||
process_audio_transcribe_summarize,
|
|
||||||
process_audio_transcribe_summarize_v2,
|
process_audio_transcribe_summarize_v2,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -31,22 +30,17 @@ router = APIRouter(prefix="/tasks")
|
|||||||
@router.post("/")
|
@router.post("/")
|
||||||
async def create_task(request: TaskCreation):
|
async def create_task(request: TaskCreation):
|
||||||
"""Create a task."""
|
"""Create a task."""
|
||||||
if request.version == 1:
|
task = process_audio_transcribe_summarize_v2.apply_async(
|
||||||
task = process_audio_transcribe_summarize.delay(
|
args=[
|
||||||
request.filename, request.email, request.sub
|
request.filename,
|
||||||
)
|
request.email,
|
||||||
else:
|
request.sub,
|
||||||
task = process_audio_transcribe_summarize_v2.apply_async(
|
time.time(),
|
||||||
args=[
|
request.room,
|
||||||
request.filename,
|
request.recording_date,
|
||||||
request.email,
|
request.recording_time,
|
||||||
request.sub,
|
]
|
||||||
time.time(),
|
)
|
||||||
request.room,
|
|
||||||
request.recording_date,
|
|
||||||
request.recording_time,
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
return {"id": task.id, "message": "Task created"}
|
return {"id": task.id, "message": "Task created"}
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ from urllib3.util import Retry
|
|||||||
|
|
||||||
from summary.core.analytics import MetadataManager, get_analytics
|
from summary.core.analytics import MetadataManager, get_analytics
|
||||||
from summary.core.config import get_settings
|
from summary.core.config import get_settings
|
||||||
from summary.core.prompt import get_instructions
|
|
||||||
|
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
analytics = get_analytics()
|
analytics = get_analytics()
|
||||||
@@ -156,82 +155,6 @@ def task_failure_handler(task_id, exception=None, **kwargs):
|
|||||||
metadata_manager.capture(task_id, settings.posthog_event_failure)
|
metadata_manager.capture(task_id, settings.posthog_event_failure)
|
||||||
|
|
||||||
|
|
||||||
@celery.task(max_retries=settings.celery_max_retries)
|
|
||||||
def process_audio_transcribe_summarize(filename: str, email: str, sub: str):
|
|
||||||
"""Process an audio file by transcribing it and generating a summary.
|
|
||||||
|
|
||||||
This Celery task performs the following operations:
|
|
||||||
1. Retrieves the audio file from MinIO storage
|
|
||||||
2. Transcribes the audio using OpenAI-compliant API's ASR model
|
|
||||||
3. Generates a summary of the transcription using OpenAI-compliant API's LLM
|
|
||||||
4. Sends the results via webhook
|
|
||||||
"""
|
|
||||||
logger.info("Notification received")
|
|
||||||
logger.debug("filename: %s", filename)
|
|
||||||
|
|
||||||
minio_client = Minio(
|
|
||||||
settings.aws_s3_endpoint_url,
|
|
||||||
access_key=settings.aws_s3_access_key_id,
|
|
||||||
secret_key=settings.aws_s3_secret_access_key,
|
|
||||||
secure=settings.aws_s3_secure_access,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.debug("Connection to the Minio bucket successful")
|
|
||||||
|
|
||||||
audio_file_stream = minio_client.get_object(
|
|
||||||
settings.aws_storage_bucket_name, object_name=filename
|
|
||||||
)
|
|
||||||
|
|
||||||
temp_file_path = save_audio_stream(audio_file_stream)
|
|
||||||
logger.debug("Recording successfully downloaded, filepath: %s", temp_file_path)
|
|
||||||
|
|
||||||
logger.info("Initiating OpenAI client")
|
|
||||||
|
|
||||||
openai_client = openai.OpenAI(
|
|
||||||
api_key=settings.openai_api_key,
|
|
||||||
base_url=settings.openai_base_url,
|
|
||||||
max_retries=settings.openai_max_retries,
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.info("Querying transcription …")
|
|
||||||
with open(temp_file_path, "rb") as audio_file:
|
|
||||||
transcription = openai_client.audio.transcriptions.create(
|
|
||||||
model=settings.openai_asr_model, file=audio_file
|
|
||||||
)
|
|
||||||
transcription = transcription.text
|
|
||||||
|
|
||||||
logger.debug("Transcription: \n %s", transcription)
|
|
||||||
finally:
|
|
||||||
if os.path.exists(temp_file_path):
|
|
||||||
os.remove(temp_file_path)
|
|
||||||
logger.debug("Temporary file removed: %s", temp_file_path)
|
|
||||||
|
|
||||||
instructions = get_instructions(transcription)
|
|
||||||
summary_response = openai_client.chat.completions.create(
|
|
||||||
model=settings.openai_llm_model, messages=instructions
|
|
||||||
)
|
|
||||||
|
|
||||||
summary = summary_response.choices[0].message.content
|
|
||||||
logger.debug("Summary: \n %s", summary)
|
|
||||||
|
|
||||||
# fixme - generate a title using LLM
|
|
||||||
data = {
|
|
||||||
"title": "Votre résumé",
|
|
||||||
"content": summary,
|
|
||||||
"email": email,
|
|
||||||
"sub": sub,
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.debug("Submitting webhook to %s", settings.webhook_url)
|
|
||||||
logger.debug("Request payload: %s", json.dumps(data, indent=2))
|
|
||||||
|
|
||||||
response = post_with_retries(settings.webhook_url, data)
|
|
||||||
|
|
||||||
logger.info("Webhook submitted successfully. Status: %s", response.status_code)
|
|
||||||
logger.debug("Response body: %s", response.text)
|
|
||||||
|
|
||||||
|
|
||||||
@celery.task(
|
@celery.task(
|
||||||
bind=True,
|
bind=True,
|
||||||
autoretry_for=[exceptions.HTTPError],
|
autoretry_for=[exceptions.HTTPError],
|
||||||
|
|||||||
@@ -35,7 +35,6 @@ class Settings(BaseSettings):
|
|||||||
openai_api_key: str
|
openai_api_key: str
|
||||||
openai_base_url: str = "https://api.openai.com/v1"
|
openai_base_url: str = "https://api.openai.com/v1"
|
||||||
openai_asr_model: str = "whisper-1"
|
openai_asr_model: str = "whisper-1"
|
||||||
openai_llm_model: str = "gpt-4o"
|
|
||||||
openai_max_retries: int = 0
|
openai_max_retries: int = 0
|
||||||
|
|
||||||
# Webhook-related settings
|
# Webhook-related settings
|
||||||
|
|||||||
@@ -1,52 +0,0 @@
|
|||||||
# ruff: noqa
|
|
||||||
|
|
||||||
|
|
||||||
def get_instructions(transcript):
|
|
||||||
"""Declare the summarize instructions."""
|
|
||||||
prompt = f"""
|
|
||||||
Audience: Coworkers.
|
|
||||||
|
|
||||||
**Do:**
|
|
||||||
- Detect the language of the transcript and provide your entire response in the same language.
|
|
||||||
- If any part of the transcript is unclear or lacks detail, politely inform the user, specifying which areas need further clarification.
|
|
||||||
- Ensure the accuracy of all information and refrain from adding unverified details.
|
|
||||||
- Format the response using proper markdown and structured sections.
|
|
||||||
- Be concise and avoid repeating yourself between the sections.
|
|
||||||
- Be super precise on nickname
|
|
||||||
- Be a nit-picker
|
|
||||||
- Auto-evaluate your response
|
|
||||||
|
|
||||||
**Don't:**
|
|
||||||
- Write something your are not sure.
|
|
||||||
- Write something that is not mention in the transcript.
|
|
||||||
- Don't make mistake while mentioning someone
|
|
||||||
**Task:**
|
|
||||||
Summarize the provided meeting transcript into clear and well-organized meeting minutes. The summary should be structured into the following sections, excluding irrelevant or inapplicable details:
|
|
||||||
|
|
||||||
1. **Summary**: Write a TL;DR of the meeting.
|
|
||||||
2. **Subjects Discussed**: List the key points or issues in bullet points.
|
|
||||||
4. **Next Steps**: Provide action items as bullet points, assigning each task to a responsible individual and including deadlines (if mentioned). Format action items as tickable checkboxes. Ensure every action is assigned and, if a deadline is provided, that it is clearly stated.
|
|
||||||
|
|
||||||
**Transcript**:
|
|
||||||
{transcript}
|
|
||||||
|
|
||||||
**Response:**
|
|
||||||
|
|
||||||
### Summary [Translate this title based on the transcript’s language]
|
|
||||||
[Provide a brief overview of the key points discussed]
|
|
||||||
|
|
||||||
### Subjects Discussed [Translate this title based on the transcript’s language]
|
|
||||||
- [Summarize each topic concisely]
|
|
||||||
|
|
||||||
### Next Steps [Translate this title based on the transcript’s language]
|
|
||||||
- [ ] Action item [Assign to the responsible individual(s) and include a deadline if applicable, follow this strict format: Action - List of owner(s), deadline.]
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You are a concise and structured assistant, that summarizes meeting transcripts.",
|
|
||||||
},
|
|
||||||
{"role": "user", "content": prompt},
|
|
||||||
]
|
|
||||||
Reference in New Issue
Block a user