diff --git a/src/summary/summary/core/celery_worker.py b/src/summary/summary/core/celery_worker.py
index f98073b4..1f120bd2 100644
--- a/src/summary/summary/core/celery_worker.py
+++ b/src/summary/summary/core/celery_worker.py
@@ -31,6 +31,7 @@ from summary.core.prompt import (
     PROMPT_SYSTEM_TLDR,
     PROMPT_USER_PART,
 )
+from summary.core.transcript_formatter import TranscriptFormatter
 
 settings = get_settings()
 analytics = get_analytics()
@@ -56,28 +57,6 @@ if settings.sentry_dsn and settings.sentry_is_enabled:
         sentry_sdk.init(dsn=settings.sentry_dsn, enable_tracing=True)
 
 
-DEFAULT_EMPTY_TRANSCRIPTION = """
-**Aucun contenu audio n’a été détecté dans votre transcription.**
-
-
-*Si vous pensez qu’il s’agit d’une erreur, n’hésitez pas à contacter
-notre support technique : visio@numerique.gouv.fr*
-
-.
-
-.
-
-.
-
-Quelques points que nous vous conseillons de vérifier :
-- Un micro était-il activé ?
-- Étiez-vous suffisamment proche ?
-- Le micro est-il de bonne qualité ?
-- L’enregistrement dure-t-il plus de 30 secondes ?
-
-"""
-
-
 class AudioValidationError(Exception):
     """Custom exception for audio validation errors."""
 
@@ -166,36 +145,6 @@ def format_actions(llm_output: dict) -> str:
     return ""
 
 
-def format_segments(transcription_data):
-    """Format transcription segments from WhisperX into a readable conversation format.
-
-    Processes transcription data with segments containing speaker information and text,
-    combining consecutive segments from the same speaker and formatting them as a
-    conversation with speaker labels.
-    """
-    formatted_output = ""
-    if not transcription_data or not hasattr(transcription_data, "segments"):
-        if isinstance(transcription_data, dict) and "segments" in transcription_data:
-            segments = transcription_data["segments"]
-        else:
-            return "Error: Invalid transcription data format"
-    else:
-        segments = transcription_data.segments
-
-    previous_speaker = None
-
-    for segment in segments:
-        speaker = segment.get("speaker", "UNKNOWN_SPEAKER")
-        text = segment.get("text", "")
-        if text:
-            if speaker != previous_speaker:
-                formatted_output += f"\n\n **{speaker}**: {text}"
-            else:
-                formatted_output += f" {text}"
-            previous_speaker = speaker
-    return formatted_output
-
-
 def post_with_retries(url, data):
     """Send POST request with automatic retries."""
     session = create_retry_session()
@@ -306,25 +255,20 @@ def process_audio_transcribe_summarize_v2(
             os.remove(temp_file_path)
             logger.debug("Temporary file removed: %s", temp_file_path)
 
-    formatted_transcription = (
-        DEFAULT_EMPTY_TRANSCRIPTION
-        if not transcription.segments
-        else format_segments(transcription)
-    )
-
     metadata_manager.track_transcription_metadata(task_id, transcription)
 
-    if not room or not recording_date or not recording_time:
-        title = settings.document_default_title
-    else:
-        title = settings.document_title_template.format(
-            room=room,
-            room_recording_date=recording_date,
-            room_recording_time=recording_time,
-        )
+    formatter = TranscriptFormatter()
+
+    content, title = formatter.format(
+        transcription,
+        room=room,
+        recording_date=recording_date,
+        recording_time=recording_time,
+    )
+
     data = {
         "title": title,
-        "content": formatted_transcription,
+        "content": content,
         "email": email,
         "sub": sub,
     }
@@ -356,7 +300,7 @@ def process_audio_transcribe_summarize_v2(
     ):
         logger.info("Queuing summary generation task.")
         summarize_transcription.apply_async(
-            args=[formatted_transcription, email, sub, title],
+            args=[content, email, sub, title],
             queue=settings.summarize_queue,
         )
     else:
diff --git a/src/summary/summary/core/config.py b/src/summary/summary/core/config.py
index b4e39520..8ffe7d6e 100644
--- a/src/summary/summary/core/config.py
+++ b/src/summary/summary/core/config.py
@@ -45,6 +45,10 @@ class Settings(BaseSettings):
     llm_api_key: str
     llm_model: str
 
+    # Transcription processing
+    hallucination_patterns: List[str] = ["Vap'n'Roll Thierry"]
+    hallucination_replacement_text: str = "[Texte impossible à transcrire]"
+
     # Webhook-related settings
     webhook_max_retries: int = 2
     webhook_status_forcelist: List[int] = [502, 503, 504]
diff --git a/src/summary/summary/core/transcript_formatter.py b/src/summary/summary/core/transcript_formatter.py
new file mode 100644
index 00000000..db123533
--- /dev/null
+++ b/src/summary/summary/core/transcript_formatter.py
@@ -0,0 +1,121 @@
+"""Transcript formatting into readable conversation format with speaker labels."""
+
+import logging
+from typing import Optional, Tuple
+
+from summary.core.config import get_settings
+
+settings = get_settings()
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_EMPTY_TRANSCRIPTION = """
+**Aucun contenu audio n’a été détecté dans votre transcription.**
+
+
+*Si vous pensez qu’il s’agit d’une erreur, n’hésitez pas à contacter
+notre support technique : visio@numerique.gouv.fr*
+
+.
+
+.
+
+.
+
+Quelques points que nous vous conseillons de vérifier :
+- Un micro était-il activé ?
+- Étiez-vous suffisamment proche ?
+- Le micro est-il de bonne qualité ?
+- L’enregistrement dure-t-il plus de 30 secondes ?
+
+"""
+
+
+class TranscriptFormatter:
+    """Formats WhisperX transcription output into readable conversation format.
+
+    Handles:
+    - Extracting segments from transcription objects or dictionaries
+    - Combining consecutive segments from the same speaker
+    - Removing hallucination patterns from content
+    - Generating descriptive titles from context
+    """
+
+    def __init__(self):
+        """Initialize formatter with settings."""
+        self.hallucination_patterns = settings.hallucination_patterns
+        self.hallucination_replacement_text = settings.hallucination_replacement_text
+        self.default_title = settings.document_default_title
+        self.default_empty_transcription = DEFAULT_EMPTY_TRANSCRIPTION
+
+    def _get_segments(self, transcription):
+        """Extract segments from transcription object or dictionary."""
+        if hasattr(transcription, "segments"):
+            return transcription.segments
+
+        if isinstance(transcription, dict):
+            return transcription.get("segments", None)
+
+        return None
+
+    def format(
+        self,
+        transcription,
+        room: Optional[str] = None,
+        recording_date: Optional[str] = None,
+        recording_time: Optional[str] = None,
+    ) -> Tuple[str, str]:
+        """Format transcription into the final document and its title."""
+        segments = self._get_segments(transcription)
+
+        if not segments:
+            content = self.default_empty_transcription
+        else:
+            content = self._format_speaker(segments)
+            content = self._remove_hallucinations(content)
+
+        title = self._generate_title(room, recording_date, recording_time)
+
+        return content, title
+
+    def _remove_hallucinations(self, content: str) -> str:
+        """Remove hallucination patterns from content."""
+        replacement = self.hallucination_replacement_text or ""
+
+        for pattern in self.hallucination_patterns:
+            content = content.replace(pattern, replacement)
+        return content
+
+    def _format_speaker(self, segments) -> str:
+        """Format segments with speaker labels, combining consecutive speakers."""
+        formatted_output = ""
+        previous_speaker = None
+
+        for segment in segments:
+            speaker = segment.get("speaker", "UNKNOWN_SPEAKER")
+            text = segment.get("text", "")
+            if text:
+                if speaker != previous_speaker:
+                    formatted_output += f"\n\n **{speaker}**: {text}"
+                else:
+                    formatted_output += f" {text}"
+                previous_speaker = speaker
+
+        return formatted_output
+
+    def _generate_title(
+        self,
+        room: Optional[str] = None,
+        recording_date: Optional[str] = None,
+        recording_time: Optional[str] = None,
+    ) -> str:
+        """Generate title from context or return default."""
+        if not room or not recording_date or not recording_time:
+            return self.default_title
+
+        return settings.document_title_template.format(
+            room=room,
+            room_recording_date=recording_date,
+            room_recording_time=recording_time,
+        )