(frontend) add subtitle control with transcription display

Kickstart frontend with first draft of subtitle control visible only
to users with appropriate feature flag enabled.

Opens new container at bottom of screen displaying transcription
segments organized by participant. Transcription segment handling was
heavily LLM-generated and will likely need refactoring and review to
simplify and enhance the implementation.

Initial implementation to begin testing subtitle functionality with
real transcription data from LiveKit agents.
This commit is contained in:
lebaudantoine
2025-08-14 19:04:09 +02:00
committed by aleb_the_flash
parent f48dd5cea1
commit 9ff4b23ea7
16 changed files with 406 additions and 5 deletions

View File

@@ -31,6 +31,9 @@ export interface ApiConfig {
expiration_days?: number
max_duration?: number
}
subtitle: {
enabled: boolean
}
telephony: {
enabled: boolean
phone_number?: string

View File

@@ -16,6 +16,12 @@ const avatar = cva({
},
variants: {
context: {
subtitles: {
width: '40px',
height: '40px',
fontSize: '1.3rem',
lineHeight: '1rem',
},
list: {
width: '32px',
height: '32px',

View File

@@ -3,4 +3,5 @@ export enum FeatureFlags {
ScreenRecording = 'screen-recording',
faceLandmarks = 'face-landmarks',
noiseReduction = 'noise-reduction',
subtitles = 'subtitles',
}

View File

@@ -0,0 +1,38 @@
import { useTranslation } from 'react-i18next'
import { RiClosedCaptioningLine } from '@remixicon/react'
import { ToggleButton } from '@/primitives'
import { css } from '@/styled-system/css'
import { useSubtitles } from '@/features/subtitle/hooks/useSubtitles'
import { useAreSubtitlesAvailable } from '@/features/subtitle/hooks/useAreSubtitlesAvailable'
export const SubtitlesToggle = () => {
const { t } = useTranslation('rooms', { keyPrefix: 'controls.subtitles' })
const { areSubtitlesOpen, toggleSubtitles, areSubtitlesPending } =
useSubtitles()
const tooltipLabel = areSubtitlesOpen ? 'open' : 'closed'
const areSubtitlesAvailable = useAreSubtitlesAvailable()
if (!areSubtitlesAvailable) return null
return (
<div
className={css({
position: 'relative',
display: 'inline-block',
})}
>
<ToggleButton
square
variant="primaryDark"
aria-label={t(tooltipLabel)}
tooltip={t(tooltipLabel)}
isSelected={areSubtitlesOpen}
isDisabled={areSubtitlesPending}
onPress={toggleSubtitles}
data-attr={`controls-subtitles-${tooltipLabel}`}
>
<RiClosedCaptioningLine />
</ToggleButton>
</div>
)
}

View File

@@ -6,6 +6,7 @@ import { Track } from 'livekit-client'
import { ReactionsToggle } from '../../components/controls/ReactionsToggle'
import { HandToggle } from '../../components/controls/HandToggle'
import { ScreenShareToggle } from '../../components/controls/ScreenShareToggle'
import { SubtitlesToggle } from '../../components/controls/SubtitlesToggle'
import { OptionsButton } from '../../components/controls/Options/OptionsButton'
import { StartMediaButton } from '../../components/controls/StartMediaButton'
import { MoreOptions } from './MoreOptions'
@@ -68,6 +69,7 @@ export function DesktopControlBar({
}
/>
)}
<SubtitlesToggle />
<HandToggle />
<OptionsButton />
<LeaveButton />

View File

@@ -34,6 +34,8 @@ import { useConnectionObserver } from '../hooks/useConnectionObserver'
import { useNoiseReduction } from '../hooks/useNoiseReduction'
import { useVideoResolutionSubscription } from '../hooks/useVideoResolutionSubscription'
import { SettingsDialogProvider } from '@/features/settings/components/SettingsDialogProvider'
import { useSubtitles } from '@/features/subtitle/hooks/useSubtitles'
import { Subtitles } from '@/features/subtitle/component/Subtitles'
const LayoutWrapper = styled(
'div',
@@ -42,7 +44,17 @@ const LayoutWrapper = styled(
position: 'relative',
display: 'flex',
width: '100%',
height: '100%',
transition: 'height .5s cubic-bezier(0.4,0,0.2,1) 5ms',
},
variants: {
areSubtitlesOpen: {
true: {
height: 'calc(100% - 12rem)',
},
false: {
height: '100%',
},
},
},
})
)
@@ -158,6 +170,7 @@ export function VideoConference({ ...props }: VideoConferenceProps) {
/* eslint-enable react-hooks/exhaustive-deps */
const { isSidePanelOpen } = useSidePanel()
const { areSubtitlesOpen } = useSubtitles()
const [isShareErrorVisible, setIsShareErrorVisible] = useState(false)
@@ -183,14 +196,19 @@ export function VideoConference({ ...props }: VideoConferenceProps) {
style={{
position: 'absolute',
inset: isSidePanelOpen
? 'var(--lk-grid-gap) calc(358px + 3rem) calc(80px + var(--lk-grid-gap)) 16px'
: 'var(--lk-grid-gap) var(--lk-grid-gap) calc(80px + var(--lk-grid-gap))',
? `var(--lk-grid-gap) calc(358px + 3rem) calc(80px + var(--lk-grid-gap)) 16px`
: `var(--lk-grid-gap) var(--lk-grid-gap) calc(80px + var(--lk-grid-gap))`,
transition: 'inset .5s cubic-bezier(0.4,0,0.2,1) 5ms',
maxHeight: '100%',
}}
>
<LayoutWrapper>
<LayoutWrapper areSubtitlesOpen={areSubtitlesOpen}>
<div
style={{ display: 'flex', position: 'relative', width: '100%' }}
style={{
display: 'flex',
position: 'relative',
width: '100%',
}}
>
{!focusTrack ? (
<div
@@ -221,6 +239,7 @@ export function VideoConference({ ...props }: VideoConferenceProps) {
)}
</div>
</LayoutWrapper>
<Subtitles />
<MainNotificationToast />
</div>
<ControlBar

View File

@@ -0,0 +1,5 @@
import { Participant } from 'livekit-client'
export const getParticipantName = (participant: Participant): string => {
return participant.name || participant.identity || 'Unknown'
}

View File

@@ -0,0 +1,30 @@
import { useMutation, UseMutationOptions } from '@tanstack/react-query'
import { fetchApi } from '@/api/fetchApi'
import { ApiError } from '@/api/ApiError'
import { ApiRoom } from '@/features/rooms/api/ApiRoom'
export interface StartSubtitleParams {
id: string
token: string
}
const startSubtitle = ({
id,
token,
}: StartSubtitleParams): Promise<ApiRoom> => {
return fetchApi(`rooms/${id}/start-subtitle/`, {
method: 'POST',
body: JSON.stringify({
token,
}),
})
}
export function useStartSubtitle(
options?: UseMutationOptions<ApiRoom, ApiError, StartSubtitleParams>
) {
return useMutation<ApiRoom, ApiError, StartSubtitleParams>({
mutationFn: startSubtitle,
...options,
})
}

View File

@@ -0,0 +1,238 @@
import { useEffect, useState } from 'react'
import { useSubtitles } from '../hooks/useSubtitles'
import { css, cva } from '@/styled-system/css'
import { styled } from '@/styled-system/jsx'
import { Avatar } from '@/components/Avatar'
import { Text } from '@/primitives'
import { useRoomContext } from '@livekit/components-react'
import { getParticipantColor } from '@/features/rooms/utils/getParticipantColor'
import { getParticipantName } from '@/features/rooms/utils/getParticipantName'
import { Participant, RoomEvent } from 'livekit-client'
export interface TranscriptionSegment {
id: string
text: string
language: string
startTime: number
endTime: number
final: boolean
firstReceivedTime: number
lastReceivedTime: number
}
export interface TranscriptionRow {
id: string
participant: Participant
segments: TranscriptionSegment[]
startTime: number
lastUpdateTime: number
}
const useTranscriptionState = () => {
const [transcriptionRows, setTranscriptionRows] = useState<
TranscriptionRow[]
>([])
const [lastActiveParticipantIdentity, setLastActiveParticipantIdentity] =
useState<string | null>(null)
const updateTranscriptions = (
segments: TranscriptionSegment[],
participant?: Participant
) => {
if (!participant || segments.length === 0) return
setTranscriptionRows((prevRows) => {
const updatedRows = [...prevRows]
const now = Date.now()
const shouldAppendToLastRow =
lastActiveParticipantIdentity === participant.identity &&
updatedRows.length > 0
if (shouldAppendToLastRow) {
const lastRowIndex = updatedRows.length - 1
const lastRow = updatedRows[lastRowIndex]
const existingSegmentIds = new Set(lastRow.segments.map((s) => s.id))
const newSegments = segments.filter(
(segment) => !existingSegmentIds.has(segment.id)
)
const updatedSegments = lastRow.segments.map((existing) => {
const update = segments.find((s) => s.id === existing.id)
return update && update.final ? update : existing
})
updatedRows[lastRowIndex] = {
...lastRow,
segments: [...updatedSegments, ...newSegments],
lastUpdateTime: now,
}
} else {
const newRow: TranscriptionRow = {
id: `${participant.identity}-${now}`,
participant,
segments: [...segments],
startTime: Math.min(...segments.map((s) => s.startTime)),
lastUpdateTime: now,
}
updatedRows.push(newRow)
}
return updatedRows
})
setLastActiveParticipantIdentity(participant.identity)
}
const clearTranscriptions = () => {
setTranscriptionRows([])
setLastActiveParticipantIdentity(null)
}
const updateParticipant = (_name: string, participant: Participant) => {
setTranscriptionRows((prevRows) => {
return prevRows.map((row) => {
if (row.participant.identity === participant.identity) {
return {
...row,
participant,
}
}
return row
})
})
}
return {
transcriptionRows,
updateTranscriptions,
clearTranscriptions,
updateParticipant,
}
}
const Transcription = ({ row }: { row: TranscriptionRow }) => {
const participantColor = getParticipantColor(row.participant)
const participantName = getParticipantName(row.participant)
const getDisplayText = (row: TranscriptionRow): string => {
return row.segments
.filter((segment) => segment.text.trim())
.map((segment) => segment.text.trim())
.join(' ')
}
const displayText = getDisplayText(row)
if (!displayText) return null
return (
<div
className={css({
maxWidth: '800px',
width: '100%',
})}
>
<div
className={css({
display: 'flex',
gap: '0.5rem',
})}
>
<Avatar
name={participantName}
bgColor={participantColor}
context="subtitles"
/>
<div
className={css({
color: 'white',
width: '100%',
})}
>
<Text variant="h3" margin={false}>
{participantName}
</Text>
<p
className={css({
fontSize: '1.5rem',
lineHeight: '1.7rem',
fontWeight: '400',
})}
>
{displayText}
</p>
</div>
</div>
</div>
)
}
const SubtitlesWrapper = styled(
'div',
cva({
base: {
width: '100%',
paddingTop: 'var(--lk-grid-gap)',
transition: 'height .5s cubic-bezier(0.4,0,0.2,1) 5ms',
},
variants: {
areOpen: {
true: {
height: '12rem',
},
false: {
height: '0',
},
},
},
})
)
export const Subtitles = () => {
const { areSubtitlesOpen } = useSubtitles()
const room = useRoomContext()
const { transcriptionRows, updateTranscriptions, updateParticipant } =
useTranscriptionState()
useEffect(() => {
if (!room) return
room.on(RoomEvent.TranscriptionReceived, updateTranscriptions)
return () => {
room.off(RoomEvent.TranscriptionReceived, updateTranscriptions)
}
}, [room, updateTranscriptions])
useEffect(() => {
if (!room) return
room.on(RoomEvent.ParticipantNameChanged, updateParticipant)
return () => {
room.off(RoomEvent.ParticipantNameChanged, updateParticipant)
}
}, [room, updateParticipant])
return (
<SubtitlesWrapper areOpen={areSubtitlesOpen}>
<div
className={css({
height: '100%',
width: '100%',
display: 'flex',
gap: '1.25rem',
flexDirection: 'column-reverse',
overflowAnchor: 'auto',
overflowY: 'scroll',
padding: '0 1rem',
alignItems: 'center',
})}
>
{transcriptionRows
.slice()
.reverse()
.map((row) => (
<Transcription key={row.id} row={row} />
))}
</div>
</SubtitlesWrapper>
)
}

View File

@@ -0,0 +1,13 @@
import { useFeatureFlagEnabled } from 'posthog-js/react'
import { FeatureFlags } from '@/features/analytics/enums'
import { useIsAnalyticsEnabled } from '@/features/analytics/hooks/useIsAnalyticsEnabled'
import { useConfig } from '@/api/useConfig'
export const useAreSubtitlesAvailable = () => {
const featureEnabled = useFeatureFlagEnabled(FeatureFlags.subtitles)
const isAnalyticsEnabled = useIsAnalyticsEnabled()
const { data } = useConfig()
return data?.subtitle.enabled && (!isAnalyticsEnabled || featureEnabled)
}

View File

@@ -0,0 +1,28 @@
import { useSnapshot } from 'valtio'
import { layoutStore } from '@/stores/layout'
import { useStartSubtitle } from '../api/startSubtitle'
import { useRoomData } from '@/features/rooms/livekit/hooks/useRoomData'
export const useSubtitles = () => {
const layoutSnap = useSnapshot(layoutStore)
const apiRoomData = useRoomData()
const { mutateAsync: startSubtitleRoom, isPending } = useStartSubtitle()
const toggleSubtitles = async () => {
if (!layoutSnap.showSubtitles && apiRoomData?.livekit) {
await startSubtitleRoom({
id: apiRoomData?.livekit?.room,
token: apiRoomData?.livekit?.token,
})
}
layoutStore.showSubtitles = !layoutSnap.showSubtitles
}
return {
areSubtitlesOpen: layoutSnap.showSubtitles,
toggleSubtitles,
areSubtitlesPending: isPending,
}
}

View File

@@ -168,6 +168,10 @@
"raise": "Hand heben",
"lower": "Hand senken"
},
"subtitles": {
"closed": "Untertitel anzeigen",
"open": "Untertitel ausblenden"
},
"screenShare": {
"start": "Bildschirm freigeben",
"stop": "Bildschirmfreigabe beenden"

View File

@@ -168,6 +168,10 @@
"raise": "Raise hand",
"lower": "Lower hand"
},
"subtitles": {
"closed": "Show subtitles",
"open": "Hide subtitles"
},
"screenShare": {
"start": "Share screen",
"stop": "Stop screen share"

View File

@@ -168,6 +168,10 @@
"raise": "Lever la main",
"lower": "Baisser la main"
},
"subtitles": {
"closed": "Afficher les sous-titres",
"open": "Masquer les sous-titres"
},
"screenShare": {
"start": "Partager l'écran",
"stop": "Arrêter le partage"

View File

@@ -168,6 +168,10 @@
"raise": "Hand opsteken",
"lower": "Hand laten zakken"
},
"subtitles": {
"closed": "Ondertitels weergeven",
"open": "Ondertitels verbergen"
},
"screenShare": {
"start": "Scherm delen",
"stop": "Stop schermdelen"

View File

@@ -7,6 +7,7 @@ import {
type State = {
showHeader: boolean
showFooter: boolean
showSubtitles: boolean
activePanelId: PanelId | null
activeSubPanelId: SubPanelId | null
}
@@ -14,6 +15,7 @@ type State = {
export const layoutStore = proxy<State>({
showHeader: false,
showFooter: false,
showSubtitles: false,
activePanelId: null,
activeSubPanelId: null,
})