🔊(summary) increase transcription Celery task logging verbosity
Add detailed logging for owner ID, recording metadata, and processing context in transcription tasks to improve debugging capabilities. It was especially important to get the created document id, so when having trouble with the docs API, I could share with them the newly created documents being impacted.
This commit is contained in:
committed by
aleb_the_flash
parent
6cd54f7e1e
commit
990507e3c7
@@ -233,11 +233,16 @@ def process_audio_transcribe_summarize_v2(
|
||||
3. Sends the results via webhook
|
||||
|
||||
"""
|
||||
logger.info("Notification received")
|
||||
logger.debug("filename: %s", filename)
|
||||
logger.info(
|
||||
"Notification received | Owner: %s | Room: %s",
|
||||
owner_id,
|
||||
room,
|
||||
)
|
||||
|
||||
task_id = self.request.id
|
||||
|
||||
logger.info("Download recording | Filename: %s", filename)
|
||||
|
||||
minio_client = Minio(
|
||||
settings.aws_s3_endpoint_url,
|
||||
access_key=settings.aws_s3_access_key_id,
|
||||
@@ -278,7 +283,9 @@ def process_audio_transcribe_summarize_v2(
|
||||
)
|
||||
|
||||
try:
|
||||
logger.info("Querying transcription …")
|
||||
logger.info(
|
||||
"Querying transcription for %s seconds of audio …", audio_file.info.length
|
||||
)
|
||||
transcription_start_time = time.time()
|
||||
with open(temp_file_path, "rb") as audio_file:
|
||||
transcription = whisperx_client.audio.transcriptions.create(
|
||||
@@ -286,15 +293,13 @@ def process_audio_transcribe_summarize_v2(
|
||||
file=audio_file,
|
||||
language=settings.whisperx_default_language,
|
||||
)
|
||||
|
||||
transcription_time = round(time.time() - transcription_start_time, 2)
|
||||
metadata_manager.track(
|
||||
task_id,
|
||||
{
|
||||
"transcription_time": round(
|
||||
time.time() - transcription_start_time, 2
|
||||
)
|
||||
},
|
||||
{"transcription_time": transcription_time},
|
||||
)
|
||||
logger.info("Transcription received.")
|
||||
logger.info("Transcription received in %s seconds.", transcription_time)
|
||||
logger.debug("Transcription: \n %s", transcription)
|
||||
finally:
|
||||
if os.path.exists(temp_file_path):
|
||||
@@ -329,8 +334,19 @@ def process_audio_transcribe_summarize_v2(
|
||||
|
||||
response = post_with_retries(settings.webhook_url, data)
|
||||
|
||||
logger.info("Webhook submitted successfully. Status: %s", response.status_code)
|
||||
logger.debug("Response body: %s", response.text)
|
||||
try:
|
||||
response_data = response.json()
|
||||
document_id = response_data.get("id", "N/A")
|
||||
except (json.JSONDecodeError, AttributeError):
|
||||
document_id = "Unable to parse response"
|
||||
response_data = response.text
|
||||
|
||||
logger.info(
|
||||
"Webhook success | Document %s submitted (HTTP %s)",
|
||||
document_id,
|
||||
response.status_code,
|
||||
)
|
||||
logger.debug("Full response: %s", response_data)
|
||||
|
||||
metadata_manager.capture(task_id, settings.posthog_event_success)
|
||||
|
||||
@@ -344,7 +360,7 @@ def process_audio_transcribe_summarize_v2(
|
||||
queue=settings.summarize_queue,
|
||||
)
|
||||
else:
|
||||
logger.info("Summary generation not enabled for this user.")
|
||||
logger.info("Summary generation not enabled for this user. Skipping.")
|
||||
|
||||
|
||||
@signals.task_prerun.connect(sender=process_audio_transcribe_summarize_v2)
|
||||
|
||||
Reference in New Issue
Block a user