From 9ff4b23ea719390967edfaa462b01f04a04edb85 Mon Sep 17 00:00:00 2001 From: lebaudantoine Date: Thu, 14 Aug 2025 19:04:09 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(frontend)=20add=20subtitle=20control?= =?UTF-8?q?=20with=20transcription=20display?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kickstart frontend with first draft of subtitle control visible only to users with appropriate feature flag enabled. Opens new container at bottom of screen displaying transcription segments organized by participant. Transcription segment handling was heavily LLM-generated and will likely need refactoring and review to simplify and enhance the implementation. Initial implementation to begin testing subtitle functionality with real transcription data from LiveKit agents. --- src/frontend/src/api/useConfig.ts | 3 + src/frontend/src/components/Avatar.tsx | 6 + src/frontend/src/features/analytics/enums.ts | 1 + .../components/controls/SubtitlesToggle.tsx | 38 +++ .../prefabs/ControlBar/DesktopControlBar.tsx | 2 + .../rooms/livekit/prefabs/VideoConference.tsx | 29 ++- .../rooms/utils/getParticipantName.ts | 5 + .../features/subtitle/api/startSubtitle.ts | 30 +++ .../features/subtitle/component/Subtitles.tsx | 238 ++++++++++++++++++ .../hooks/useAreSubtitlesAvailable.ts | 13 + .../features/subtitle/hooks/useSubtitles.tsx | 28 +++ src/frontend/src/locales/de/rooms.json | 4 + src/frontend/src/locales/en/rooms.json | 4 + src/frontend/src/locales/fr/rooms.json | 4 + src/frontend/src/locales/nl/rooms.json | 4 + src/frontend/src/stores/layout.ts | 2 + 16 files changed, 406 insertions(+), 5 deletions(-) create mode 100644 src/frontend/src/features/rooms/livekit/components/controls/SubtitlesToggle.tsx create mode 100644 src/frontend/src/features/rooms/utils/getParticipantName.ts create mode 100644 src/frontend/src/features/subtitle/api/startSubtitle.ts create mode 100644 src/frontend/src/features/subtitle/component/Subtitles.tsx create mode 100644 src/frontend/src/features/subtitle/hooks/useAreSubtitlesAvailable.ts create mode 100644 src/frontend/src/features/subtitle/hooks/useSubtitles.tsx diff --git a/src/frontend/src/api/useConfig.ts b/src/frontend/src/api/useConfig.ts index 3dc4feb5..e64bd02e 100644 --- a/src/frontend/src/api/useConfig.ts +++ b/src/frontend/src/api/useConfig.ts @@ -31,6 +31,9 @@ export interface ApiConfig { expiration_days?: number max_duration?: number } + subtitle: { + enabled: boolean + } telephony: { enabled: boolean phone_number?: string diff --git a/src/frontend/src/components/Avatar.tsx b/src/frontend/src/components/Avatar.tsx index 87a33743..a2f384f0 100644 --- a/src/frontend/src/components/Avatar.tsx +++ b/src/frontend/src/components/Avatar.tsx @@ -16,6 +16,12 @@ const avatar = cva({ }, variants: { context: { + subtitles: { + width: '40px', + height: '40px', + fontSize: '1.3rem', + lineHeight: '1rem', + }, list: { width: '32px', height: '32px', diff --git a/src/frontend/src/features/analytics/enums.ts b/src/frontend/src/features/analytics/enums.ts index 5c28e84f..bf552bca 100644 --- a/src/frontend/src/features/analytics/enums.ts +++ b/src/frontend/src/features/analytics/enums.ts @@ -3,4 +3,5 @@ export enum FeatureFlags { ScreenRecording = 'screen-recording', faceLandmarks = 'face-landmarks', noiseReduction = 'noise-reduction', + subtitles = 'subtitles', } diff --git a/src/frontend/src/features/rooms/livekit/components/controls/SubtitlesToggle.tsx b/src/frontend/src/features/rooms/livekit/components/controls/SubtitlesToggle.tsx new file mode 100644 index 00000000..9101c459 --- /dev/null +++ b/src/frontend/src/features/rooms/livekit/components/controls/SubtitlesToggle.tsx @@ -0,0 +1,38 @@ +import { useTranslation } from 'react-i18next' +import { RiClosedCaptioningLine } from '@remixicon/react' +import { ToggleButton } from '@/primitives' +import { css } from '@/styled-system/css' +import { useSubtitles } from '@/features/subtitle/hooks/useSubtitles' +import { useAreSubtitlesAvailable } from '@/features/subtitle/hooks/useAreSubtitlesAvailable' + +export const SubtitlesToggle = () => { + const { t } = useTranslation('rooms', { keyPrefix: 'controls.subtitles' }) + const { areSubtitlesOpen, toggleSubtitles, areSubtitlesPending } = + useSubtitles() + const tooltipLabel = areSubtitlesOpen ? 'open' : 'closed' + const areSubtitlesAvailable = useAreSubtitlesAvailable() + + if (!areSubtitlesAvailable) return null + + return ( +
+ + + +
+ ) +} diff --git a/src/frontend/src/features/rooms/livekit/prefabs/ControlBar/DesktopControlBar.tsx b/src/frontend/src/features/rooms/livekit/prefabs/ControlBar/DesktopControlBar.tsx index 504fafb8..c7b050e2 100644 --- a/src/frontend/src/features/rooms/livekit/prefabs/ControlBar/DesktopControlBar.tsx +++ b/src/frontend/src/features/rooms/livekit/prefabs/ControlBar/DesktopControlBar.tsx @@ -6,6 +6,7 @@ import { Track } from 'livekit-client' import { ReactionsToggle } from '../../components/controls/ReactionsToggle' import { HandToggle } from '../../components/controls/HandToggle' import { ScreenShareToggle } from '../../components/controls/ScreenShareToggle' +import { SubtitlesToggle } from '../../components/controls/SubtitlesToggle' import { OptionsButton } from '../../components/controls/Options/OptionsButton' import { StartMediaButton } from '../../components/controls/StartMediaButton' import { MoreOptions } from './MoreOptions' @@ -68,6 +69,7 @@ export function DesktopControlBar({ } /> )} + diff --git a/src/frontend/src/features/rooms/livekit/prefabs/VideoConference.tsx b/src/frontend/src/features/rooms/livekit/prefabs/VideoConference.tsx index cb3d7134..95efcb6d 100644 --- a/src/frontend/src/features/rooms/livekit/prefabs/VideoConference.tsx +++ b/src/frontend/src/features/rooms/livekit/prefabs/VideoConference.tsx @@ -34,6 +34,8 @@ import { useConnectionObserver } from '../hooks/useConnectionObserver' import { useNoiseReduction } from '../hooks/useNoiseReduction' import { useVideoResolutionSubscription } from '../hooks/useVideoResolutionSubscription' import { SettingsDialogProvider } from '@/features/settings/components/SettingsDialogProvider' +import { useSubtitles } from '@/features/subtitle/hooks/useSubtitles' +import { Subtitles } from '@/features/subtitle/component/Subtitles' const LayoutWrapper = styled( 'div', @@ -42,7 +44,17 @@ const LayoutWrapper = styled( position: 'relative', display: 'flex', width: '100%', - height: '100%', + transition: 'height .5s cubic-bezier(0.4,0,0.2,1) 5ms', + }, + variants: { + areSubtitlesOpen: { + true: { + height: 'calc(100% - 12rem)', + }, + false: { + height: '100%', + }, + }, }, }) ) @@ -158,6 +170,7 @@ export function VideoConference({ ...props }: VideoConferenceProps) { /* eslint-enable react-hooks/exhaustive-deps */ const { isSidePanelOpen } = useSidePanel() + const { areSubtitlesOpen } = useSubtitles() const [isShareErrorVisible, setIsShareErrorVisible] = useState(false) @@ -183,14 +196,19 @@ export function VideoConference({ ...props }: VideoConferenceProps) { style={{ position: 'absolute', inset: isSidePanelOpen - ? 'var(--lk-grid-gap) calc(358px + 3rem) calc(80px + var(--lk-grid-gap)) 16px' - : 'var(--lk-grid-gap) var(--lk-grid-gap) calc(80px + var(--lk-grid-gap))', + ? `var(--lk-grid-gap) calc(358px + 3rem) calc(80px + var(--lk-grid-gap)) 16px` + : `var(--lk-grid-gap) var(--lk-grid-gap) calc(80px + var(--lk-grid-gap))`, transition: 'inset .5s cubic-bezier(0.4,0,0.2,1) 5ms', + maxHeight: '100%', }} > - +
{!focusTrack ? (
+
{ + return participant.name || participant.identity || 'Unknown' +} diff --git a/src/frontend/src/features/subtitle/api/startSubtitle.ts b/src/frontend/src/features/subtitle/api/startSubtitle.ts new file mode 100644 index 00000000..806af4dd --- /dev/null +++ b/src/frontend/src/features/subtitle/api/startSubtitle.ts @@ -0,0 +1,30 @@ +import { useMutation, UseMutationOptions } from '@tanstack/react-query' +import { fetchApi } from '@/api/fetchApi' +import { ApiError } from '@/api/ApiError' +import { ApiRoom } from '@/features/rooms/api/ApiRoom' + +export interface StartSubtitleParams { + id: string + token: string +} + +const startSubtitle = ({ + id, + token, +}: StartSubtitleParams): Promise => { + return fetchApi(`rooms/${id}/start-subtitle/`, { + method: 'POST', + body: JSON.stringify({ + token, + }), + }) +} + +export function useStartSubtitle( + options?: UseMutationOptions +) { + return useMutation({ + mutationFn: startSubtitle, + ...options, + }) +} diff --git a/src/frontend/src/features/subtitle/component/Subtitles.tsx b/src/frontend/src/features/subtitle/component/Subtitles.tsx new file mode 100644 index 00000000..6783f48a --- /dev/null +++ b/src/frontend/src/features/subtitle/component/Subtitles.tsx @@ -0,0 +1,238 @@ +import { useEffect, useState } from 'react' +import { useSubtitles } from '../hooks/useSubtitles' +import { css, cva } from '@/styled-system/css' +import { styled } from '@/styled-system/jsx' +import { Avatar } from '@/components/Avatar' +import { Text } from '@/primitives' +import { useRoomContext } from '@livekit/components-react' +import { getParticipantColor } from '@/features/rooms/utils/getParticipantColor' +import { getParticipantName } from '@/features/rooms/utils/getParticipantName' +import { Participant, RoomEvent } from 'livekit-client' + +export interface TranscriptionSegment { + id: string + text: string + language: string + startTime: number + endTime: number + final: boolean + firstReceivedTime: number + lastReceivedTime: number +} + +export interface TranscriptionRow { + id: string + participant: Participant + segments: TranscriptionSegment[] + startTime: number + lastUpdateTime: number +} + +const useTranscriptionState = () => { + const [transcriptionRows, setTranscriptionRows] = useState< + TranscriptionRow[] + >([]) + const [lastActiveParticipantIdentity, setLastActiveParticipantIdentity] = + useState(null) + + const updateTranscriptions = ( + segments: TranscriptionSegment[], + participant?: Participant + ) => { + if (!participant || segments.length === 0) return + + setTranscriptionRows((prevRows) => { + const updatedRows = [...prevRows] + const now = Date.now() + + const shouldAppendToLastRow = + lastActiveParticipantIdentity === participant.identity && + updatedRows.length > 0 + + if (shouldAppendToLastRow) { + const lastRowIndex = updatedRows.length - 1 + const lastRow = updatedRows[lastRowIndex] + + const existingSegmentIds = new Set(lastRow.segments.map((s) => s.id)) + const newSegments = segments.filter( + (segment) => !existingSegmentIds.has(segment.id) + ) + const updatedSegments = lastRow.segments.map((existing) => { + const update = segments.find((s) => s.id === existing.id) + return update && update.final ? update : existing + }) + + updatedRows[lastRowIndex] = { + ...lastRow, + segments: [...updatedSegments, ...newSegments], + lastUpdateTime: now, + } + } else { + const newRow: TranscriptionRow = { + id: `${participant.identity}-${now}`, + participant, + segments: [...segments], + startTime: Math.min(...segments.map((s) => s.startTime)), + lastUpdateTime: now, + } + updatedRows.push(newRow) + } + + return updatedRows + }) + + setLastActiveParticipantIdentity(participant.identity) + } + + const clearTranscriptions = () => { + setTranscriptionRows([]) + setLastActiveParticipantIdentity(null) + } + + const updateParticipant = (_name: string, participant: Participant) => { + setTranscriptionRows((prevRows) => { + return prevRows.map((row) => { + if (row.participant.identity === participant.identity) { + return { + ...row, + participant, + } + } + return row + }) + }) + } + + return { + transcriptionRows, + updateTranscriptions, + clearTranscriptions, + updateParticipant, + } +} + +const Transcription = ({ row }: { row: TranscriptionRow }) => { + const participantColor = getParticipantColor(row.participant) + const participantName = getParticipantName(row.participant) + + const getDisplayText = (row: TranscriptionRow): string => { + return row.segments + .filter((segment) => segment.text.trim()) + .map((segment) => segment.text.trim()) + .join(' ') + } + + const displayText = getDisplayText(row) + + if (!displayText) return null + + return ( +
+
+ +
+ + {participantName} + +

+ {displayText} +

+
+
+
+ ) +} + +const SubtitlesWrapper = styled( + 'div', + cva({ + base: { + width: '100%', + paddingTop: 'var(--lk-grid-gap)', + transition: 'height .5s cubic-bezier(0.4,0,0.2,1) 5ms', + }, + variants: { + areOpen: { + true: { + height: '12rem', + }, + false: { + height: '0', + }, + }, + }, + }) +) + +export const Subtitles = () => { + const { areSubtitlesOpen } = useSubtitles() + const room = useRoomContext() + const { transcriptionRows, updateTranscriptions, updateParticipant } = + useTranscriptionState() + + useEffect(() => { + if (!room) return + room.on(RoomEvent.TranscriptionReceived, updateTranscriptions) + return () => { + room.off(RoomEvent.TranscriptionReceived, updateTranscriptions) + } + }, [room, updateTranscriptions]) + + useEffect(() => { + if (!room) return + room.on(RoomEvent.ParticipantNameChanged, updateParticipant) + return () => { + room.off(RoomEvent.ParticipantNameChanged, updateParticipant) + } + }, [room, updateParticipant]) + + return ( + +
+ {transcriptionRows + .slice() + .reverse() + .map((row) => ( + + ))} +
+
+ ) +} diff --git a/src/frontend/src/features/subtitle/hooks/useAreSubtitlesAvailable.ts b/src/frontend/src/features/subtitle/hooks/useAreSubtitlesAvailable.ts new file mode 100644 index 00000000..ff689f7f --- /dev/null +++ b/src/frontend/src/features/subtitle/hooks/useAreSubtitlesAvailable.ts @@ -0,0 +1,13 @@ +import { useFeatureFlagEnabled } from 'posthog-js/react' +import { FeatureFlags } from '@/features/analytics/enums' +import { useIsAnalyticsEnabled } from '@/features/analytics/hooks/useIsAnalyticsEnabled' +import { useConfig } from '@/api/useConfig' + +export const useAreSubtitlesAvailable = () => { + const featureEnabled = useFeatureFlagEnabled(FeatureFlags.subtitles) + const isAnalyticsEnabled = useIsAnalyticsEnabled() + + const { data } = useConfig() + + return data?.subtitle.enabled && (!isAnalyticsEnabled || featureEnabled) +} diff --git a/src/frontend/src/features/subtitle/hooks/useSubtitles.tsx b/src/frontend/src/features/subtitle/hooks/useSubtitles.tsx new file mode 100644 index 00000000..b93eea47 --- /dev/null +++ b/src/frontend/src/features/subtitle/hooks/useSubtitles.tsx @@ -0,0 +1,28 @@ +import { useSnapshot } from 'valtio' +import { layoutStore } from '@/stores/layout' +import { useStartSubtitle } from '../api/startSubtitle' +import { useRoomData } from '@/features/rooms/livekit/hooks/useRoomData' + +export const useSubtitles = () => { + const layoutSnap = useSnapshot(layoutStore) + + const apiRoomData = useRoomData() + const { mutateAsync: startSubtitleRoom, isPending } = useStartSubtitle() + + const toggleSubtitles = async () => { + if (!layoutSnap.showSubtitles && apiRoomData?.livekit) { + await startSubtitleRoom({ + id: apiRoomData?.livekit?.room, + token: apiRoomData?.livekit?.token, + }) + } + + layoutStore.showSubtitles = !layoutSnap.showSubtitles + } + + return { + areSubtitlesOpen: layoutSnap.showSubtitles, + toggleSubtitles, + areSubtitlesPending: isPending, + } +} diff --git a/src/frontend/src/locales/de/rooms.json b/src/frontend/src/locales/de/rooms.json index b3f61b5d..ff51e8b4 100644 --- a/src/frontend/src/locales/de/rooms.json +++ b/src/frontend/src/locales/de/rooms.json @@ -168,6 +168,10 @@ "raise": "Hand heben", "lower": "Hand senken" }, + "subtitles": { + "closed": "Untertitel anzeigen", + "open": "Untertitel ausblenden" + }, "screenShare": { "start": "Bildschirm freigeben", "stop": "Bildschirmfreigabe beenden" diff --git a/src/frontend/src/locales/en/rooms.json b/src/frontend/src/locales/en/rooms.json index 853a7570..42acea30 100644 --- a/src/frontend/src/locales/en/rooms.json +++ b/src/frontend/src/locales/en/rooms.json @@ -168,6 +168,10 @@ "raise": "Raise hand", "lower": "Lower hand" }, + "subtitles": { + "closed": "Show subtitles", + "open": "Hide subtitles" + }, "screenShare": { "start": "Share screen", "stop": "Stop screen share" diff --git a/src/frontend/src/locales/fr/rooms.json b/src/frontend/src/locales/fr/rooms.json index 5104557b..fcd4d0da 100644 --- a/src/frontend/src/locales/fr/rooms.json +++ b/src/frontend/src/locales/fr/rooms.json @@ -168,6 +168,10 @@ "raise": "Lever la main", "lower": "Baisser la main" }, + "subtitles": { + "closed": "Afficher les sous-titres", + "open": "Masquer les sous-titres" + }, "screenShare": { "start": "Partager l'Ă©cran", "stop": "ArrĂȘter le partage" diff --git a/src/frontend/src/locales/nl/rooms.json b/src/frontend/src/locales/nl/rooms.json index 5846341b..f42680a0 100644 --- a/src/frontend/src/locales/nl/rooms.json +++ b/src/frontend/src/locales/nl/rooms.json @@ -168,6 +168,10 @@ "raise": "Hand opsteken", "lower": "Hand laten zakken" }, + "subtitles": { + "closed": "Ondertitels weergeven", + "open": "Ondertitels verbergen" + }, "screenShare": { "start": "Scherm delen", "stop": "Stop schermdelen" diff --git a/src/frontend/src/stores/layout.ts b/src/frontend/src/stores/layout.ts index f9e53b0c..b04a24b6 100644 --- a/src/frontend/src/stores/layout.ts +++ b/src/frontend/src/stores/layout.ts @@ -7,6 +7,7 @@ import { type State = { showHeader: boolean showFooter: boolean + showSubtitles: boolean activePanelId: PanelId | null activeSubPanelId: SubPanelId | null } @@ -14,6 +15,7 @@ type State = { export const layoutStore = proxy({ showHeader: false, showFooter: false, + showSubtitles: false, activePanelId: null, activeSubPanelId: null, })