Refactor media devices to live outside React as Observables (#3334)

* Refactor media devices to live outside React as Observables

This moves the media devices state out of React to further our transition to a MVVM architecture in which we can more easily model and store complex application state. I have created an AppViewModel to act as the overarching state holder for any future non-React state we end up creating, and the MediaDevices reside within this. We should move more application logic (including the CallViewModel itself) there in the future.

* Address review feedback

* Fixes from ios debugging session: (#3342)

- dont use preferred vs selected concept in controlled media. Its not needed since we dont use the id for actual browser media devices (the id's are not even actual browser media devices)
  - add more logging
  - add more conditions to not accidently set a deviceId that is not a browser deviceId but one provided via controlled.

---------

Co-authored-by: Timo <16718859+toger5@users.noreply.github.com>
This commit is contained in:
Robin
2025-06-20 12:37:25 -04:00
committed by GitHub
parent 5bf7361d01
commit 5e2e94d794
24 changed files with 763 additions and 682 deletions

View File

@@ -17,12 +17,14 @@ import { type ReactNode } from "react";
import { useTracks } from "@livekit/components-react";
import { testAudioContext } from "../useAudioContext.test";
import * as MediaDevicesContext from "./MediaDevicesContext";
import * as MediaDevicesContext from "../MediaDevicesContext";
import { MatrixAudioRenderer } from "./MatrixAudioRenderer";
import { mockTrack } from "../utils/test";
import { mockMediaDevices, mockTrack } from "../utils/test";
export const TestAudioContextConstructor = vi.fn(() => testAudioContext);
const MediaDevicesProvider = MediaDevicesContext.MediaDevicesContext.Provider;
beforeEach(() => {
vi.stubGlobal("AudioContext", TestAudioContextConstructor);
});
@@ -51,9 +53,11 @@ vi.mocked(useTracks).mockReturnValue(tracks);
it("should render for member", () => {
const { container, queryAllByTestId } = render(
<MatrixAudioRenderer
members={[{ sender: "test", deviceId: "123" }] as CallMembership[]}
/>,
<MediaDevicesProvider value={mockMediaDevices({})}>
<MatrixAudioRenderer
members={[{ sender: "test", deviceId: "123" }] as CallMembership[]}
/>
</MediaDevicesProvider>,
);
expect(container).toBeTruthy();
expect(queryAllByTestId("audio")).toHaveLength(1);
@@ -64,7 +68,9 @@ it("should not render without member", () => {
{ sender: "othermember", deviceId: "123" },
] as CallMembership[];
const { container, queryAllByTestId } = render(
<MatrixAudioRenderer members={memberships} />,
<MediaDevicesProvider value={mockMediaDevices({})}>
<MatrixAudioRenderer members={memberships} />
</MediaDevicesProvider>,
);
expect(container).toBeTruthy();
expect(queryAllByTestId("audio")).toHaveLength(0);
@@ -72,9 +78,11 @@ it("should not render without member", () => {
it("should not setup audioContext gain and pan if there is no need to.", () => {
render(
<MatrixAudioRenderer
members={[{ sender: "test", deviceId: "123" }] as CallMembership[]}
/>,
<MediaDevicesProvider value={mockMediaDevices({})}>
<MatrixAudioRenderer
members={[{ sender: "test", deviceId: "123" }] as CallMembership[]}
/>
</MediaDevicesProvider>,
);
const audioTrack = tracks[0].publication.track! as RemoteAudioTrack;
@@ -93,9 +101,11 @@ it("should setup audioContext gain and pan", () => {
volume: 0.1,
});
render(
<MatrixAudioRenderer
members={[{ sender: "test", deviceId: "123" }] as CallMembership[]}
/>,
<MediaDevicesProvider value={mockMediaDevices({})}>
<MatrixAudioRenderer
members={[{ sender: "test", deviceId: "123" }] as CallMembership[]}
/>
</MediaDevicesProvider>,
);
const audioTrack = tracks[0].publication.track! as RemoteAudioTrack;

View File

@@ -16,7 +16,7 @@ import {
import { type CallMembership } from "matrix-js-sdk/lib/matrixrtc";
import { logger as rootLogger } from "matrix-js-sdk/lib/logger";
import { useEarpieceAudioConfig } from "./MediaDevicesContext";
import { useEarpieceAudioConfig } from "../MediaDevicesContext";
import { useReactiveState } from "../useReactiveState";
import * as controls from "../controls";

View File

@@ -1,445 +0,0 @@
/*
Copyright 2023-2025 New Vector Ltd.
SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
Please see LICENSE in the repository root for full details.
*/
import {
type FC,
createContext,
useCallback,
useContext,
useEffect,
useMemo,
useRef,
useState,
type JSX,
} from "react";
import { createMediaDeviceObserver } from "@livekit/components-core";
import { combineLatest, distinctUntilChanged, map, startWith } from "rxjs";
import { useObservable, useObservableEagerState } from "observable-hooks";
import { logger } from "matrix-js-sdk/lib/logger";
import { deepCompare } from "matrix-js-sdk/lib/utils";
import {
useSetting,
audioInput as audioInputSetting,
audioOutput as audioOutputSetting,
videoInput as videoInputSetting,
alwaysShowIphoneEarpiece as alwaysShowIphoneEarpieceSetting,
type Setting,
} from "../settings/settings";
import { outputDevice$, availableOutputDevices$ } from "../controls";
import { useUrlParams } from "../UrlParams";
// This hardcoded id is used in EX ios! It can only be changed in coordination with
// the ios swift team.
export const EARPIECE_CONFIG_ID = "earpiece-id";
export type DeviceLabel =
| { type: "name"; name: string }
| { type: "number"; number: number }
| { type: "earpiece" }
| { type: "default"; name: string | null };
export interface MediaDeviceHandle {
/**
* A map from available device IDs to labels.
*/
available: Map<string, DeviceLabel>;
selectedId: string | undefined;
/**
* An additional device configuration that makes us use only one channel of the
* output device and a reduced volume.
*/
useAsEarpiece: boolean | undefined;
/**
* The group ID of the selected device.
*/
// This is exposed sort of ad-hoc because it's only needed for knowing when to
// restart the tracks of default input devices, and ideally this behavior
// would be encapsulated somehow…
selectedGroupId: string | undefined;
select: (deviceId: string) => void;
}
interface InputDevices {
audioInput: MediaDeviceHandle;
videoInput: MediaDeviceHandle;
startUsingDeviceNames: () => void;
stopUsingDeviceNames: () => void;
usingNames: boolean;
}
export interface MediaDevices extends Omit<InputDevices, "usingNames"> {
audioOutput: MediaDeviceHandle;
}
/**
* An observable that represents if we should display the devices menu for iOS.
* This implies the following
* - hide any input devices (they do not work anyhow on ios)
* - Show a button to show the native output picker instead.
* - Only show the earpiece toggle option if the earpiece is available:
* `availableOutputDevices$.includes((d)=>d.forEarpiece)`
*/
export const iosDeviceMenu$ = alwaysShowIphoneEarpieceSetting.value$.pipe(
map((v) => v || navigator.userAgent.includes("iPhone")),
);
function useSelectedId(
available: Map<string, DeviceLabel>,
preferredId: string | undefined,
): string | undefined {
return useMemo(() => {
if (available.size) {
// If the preferred device is available, use it. Or if every available
// device ID is falsy, the browser is probably just being paranoid about
// fingerprinting and we should still try using the preferred device.
// Worst case it is not available and the browser will gracefully fall
// back to some other device for us when requesting the media stream.
// Otherwise, select the first available device.
return (preferredId !== undefined && available.has(preferredId)) ||
(available.size === 1 && available.has(""))
? preferredId
: available.keys().next().value;
}
return undefined;
}, [available, preferredId]);
}
/**
* Hook to get access to a mediaDevice handle for a kind. This allows to list
* the available devices, read and set the selected device.
* @param kind Audio input, output or video output.
* @param setting The setting this handle's selection should be synced with.
* @param usingNames If the hook should query device names for the associated
* list.
* @returns A handle for the chosen kind.
*/
function useMediaDeviceHandle(
kind: MediaDeviceKind,
setting: Setting<string | undefined>,
usingNames: boolean,
): MediaDeviceHandle {
const hasRequestedPermissions = useRef(false);
const requestPermissions = usingNames || hasRequestedPermissions.current;
// Make sure we don't needlessly reset to a device observer without names,
// once permissions are already given
hasRequestedPermissions.current ||= usingNames;
// We use a bare device observer here rather than one of the fancy device
// selection hooks from @livekit/components-react, because
// useMediaDeviceSelect expects a room or track, which we don't have here, and
// useMediaDevices provides no way to request device names.
// Tragically, the only way to get device names out of LiveKit is to specify a
// kind, which then results in multiple permissions requests.
const deviceObserver$ = useMemo(
() =>
createMediaDeviceObserver(
kind,
() => logger.error("Error creating MediaDeviceObserver"),
requestPermissions,
).pipe(
startWith([]),
// This Observable emits new values whenever the browser fires a
// MediaDevices 'devicechange' event. One would think, innocently, that
// a 'devicechange' event means the devices have changed. But as of the
// time of writing, we are seeing mobile Safari firing spurious
// 'devicechange' events (where no change has actually occurred) when
// we call MediaDevices.getUserMedia. So, filter by deep equality.
distinctUntilChanged<MediaDeviceInfo[]>(deepCompare),
),
[kind, requestPermissions],
);
const available = useObservableEagerState(
useMemo(
() =>
deviceObserver$.pipe(
map((availableRaw) => {
// Sometimes browsers (particularly Firefox) can return multiple device
// entries for the exact same device ID; using a map deduplicates them
let available = new Map<string, DeviceLabel>(
availableRaw.map((d, i) => [
d.deviceId,
d.label
? { type: "name", name: d.label }
: { type: "number", number: i + 1 },
]),
);
// Create a virtual default audio output for browsers that don't have one.
// Its device ID must be the empty string because that's what setSinkId
// recognizes.
// We also create this if we do not have any available devices, so that
// we can use the default or the earpiece.
if (
kind === "audiooutput" &&
!available.has("") &&
!available.has("default") &&
available.size
)
available = new Map([
["", { type: "default", name: availableRaw[0]?.label || null }],
...available,
]);
// Note: creating virtual default input devices would be another problem
// entirely, because requesting a media stream from deviceId "" won't
// automatically track the default device.
return available;
}),
),
[deviceObserver$, kind],
),
);
const [preferredId, select] = useSetting(setting);
const selectedId = useSelectedId(available, preferredId);
const selectedGroupId = useObservableEagerState(
useMemo(
() =>
deviceObserver$.pipe(
map(
(availableRaw) =>
availableRaw.find((d) => d.deviceId === selectedId)?.groupId,
),
),
[deviceObserver$, selectedId],
),
);
return useMemo(
() => ({
available,
selectedId,
useAsEarpiece: false,
selectedGroupId,
select,
}),
[available, selectedId, selectedGroupId, select],
);
}
export const deviceStub: MediaDeviceHandle = {
available: new Map(),
selectedId: undefined,
selectedGroupId: undefined,
select: () => {},
useAsEarpiece: false,
};
export const devicesStub: MediaDevices = {
audioInput: deviceStub,
audioOutput: deviceStub,
videoInput: deviceStub,
startUsingDeviceNames: () => {},
stopUsingDeviceNames: () => {},
};
export const MediaDevicesContext = createContext<MediaDevices>(devicesStub);
function useInputDevices(): InputDevices {
// Counts the number of callers currently using device names.
const [numCallersUsingNames, setNumCallersUsingNames] = useState(0);
const usingNames = numCallersUsingNames > 0;
const audioInput = useMediaDeviceHandle(
"audioinput",
audioInputSetting,
usingNames,
);
const videoInput = useMediaDeviceHandle(
"videoinput",
videoInputSetting,
usingNames,
);
const startUsingDeviceNames = useCallback(
() => setNumCallersUsingNames((n) => n + 1),
[setNumCallersUsingNames],
);
const stopUsingDeviceNames = useCallback(
() => setNumCallersUsingNames((n) => n - 1),
[setNumCallersUsingNames],
);
return {
audioInput,
videoInput,
startUsingDeviceNames,
stopUsingDeviceNames,
usingNames,
};
}
interface Props {
children: JSX.Element;
}
export const MediaDevicesProvider: FC<Props> = ({ children }) => {
const {
audioInput,
videoInput,
startUsingDeviceNames,
stopUsingDeviceNames,
usingNames,
} = useInputDevices();
const { controlledAudioDevices } = useUrlParams();
const webViewAudioOutput = useMediaDeviceHandle(
"audiooutput",
audioOutputSetting,
usingNames,
);
const controlledAudioOutput = useControlledOutput();
const context: MediaDevices = useMemo(
() => ({
audioInput,
audioOutput: controlledAudioDevices
? controlledAudioOutput
: webViewAudioOutput,
videoInput,
startUsingDeviceNames,
stopUsingDeviceNames,
}),
[
audioInput,
controlledAudioDevices,
controlledAudioOutput,
webViewAudioOutput,
videoInput,
startUsingDeviceNames,
stopUsingDeviceNames,
],
);
return (
<MediaDevicesContext.Provider value={context}>
{children}
</MediaDevicesContext.Provider>
);
};
function useControlledOutput(): MediaDeviceHandle {
const { available } = useObservableEagerState(
useObservable(() => {
const outputDeviceData$ = availableOutputDevices$.pipe(
map((devices) => {
const deviceForEarpiece = devices.find((d) => d.forEarpiece);
const deviceMapTuple: [string, DeviceLabel][] = devices.map(
({ id, name, isEarpiece, isSpeaker /*,isExternalHeadset*/ }) => {
let deviceLabel: DeviceLabel = { type: "name", name };
// if (isExternalHeadset) // Do we want this?
if (isEarpiece) deviceLabel = { type: "earpiece" };
if (isSpeaker) deviceLabel = { type: "default", name };
return [id, deviceLabel];
},
);
return {
devicesMap: new Map<string, DeviceLabel>(deviceMapTuple),
deviceForEarpiece,
};
}),
);
return combineLatest(
[outputDeviceData$, iosDeviceMenu$],
({ devicesMap, deviceForEarpiece }, iosShowEarpiece) => {
let available = devicesMap;
if (iosShowEarpiece && !!deviceForEarpiece) {
available = new Map([
...devicesMap.entries(),
[EARPIECE_CONFIG_ID, { type: "earpiece" }],
]);
}
return { available, deviceForEarpiece };
},
);
}),
);
const [preferredId, setPreferredId] = useSetting(audioOutputSetting);
useEffect(() => {
const subscription = outputDevice$.subscribe((id) => {
if (id) setPreferredId(id);
});
return (): void => subscription.unsubscribe();
}, [setPreferredId]);
const selectedId = useSelectedId(available, preferredId);
const [asEarpiece, setAsEarpiece] = useState(false);
useEffect(() => {
// Let the hosting application know which output device has been selected.
// This information is probably only of interest if the earpiece mode has been
// selected - for example, Element X iOS listens to this to determine whether it
// should enable the proximity sensor.
if (selectedId) {
window.controls.onAudioDeviceSelect?.(selectedId);
// Call deprecated method for backwards compatibility.
window.controls.onOutputDeviceSelect?.(selectedId);
}
setAsEarpiece(selectedId === EARPIECE_CONFIG_ID);
}, [selectedId]);
return useMemo(
() => ({
available: available,
selectedId,
selectedGroupId: undefined,
select: setPreferredId,
useAsEarpiece: asEarpiece,
}),
[available, selectedId, setPreferredId, asEarpiece],
);
}
export const useMediaDevices = (): MediaDevices =>
useContext(MediaDevicesContext);
/**
* React hook that requests for the media devices context to be populated with
* real device names while this component is mounted. This is not done by
* default because it may involve requesting additional permissions from the
* user.
*/
export const useMediaDeviceNames = (
context: MediaDevices,
enabled = true,
): void =>
useEffect(() => {
if (enabled) {
context.startUsingDeviceNames();
return context.stopUsingDeviceNames;
}
}, [context, enabled]);
/**
* A convenience hook to get the audio node configuration for the earpiece.
* It will check the `useAsEarpiece` of the `audioOutput` device and return
* the appropriate pan and volume values.
*
* @returns pan and volume values for the earpiece audio node configuration.
*/
export const useEarpieceAudioConfig = (): {
pan: number;
volume: number;
} => {
const { audioOutput } = useMediaDevices();
// We use only the right speaker (pan = 1) for the earpiece.
// This mimics the behavior of the native earpiece speaker (only the top speaker on an iPhone)
const pan = useMemo(
() => (audioOutput.useAsEarpiece ? 1 : 0),
[audioOutput.useAsEarpiece],
);
// We also do lower the volume by a factor of 10 to optimize for the usecase where
// a user is holding the phone to their ear.
const volume = useMemo(
() => (audioOutput.useAsEarpiece ? 0.1 : 1),
[audioOutput.useAsEarpiece],
);
return { pan, volume };
};

View File

@@ -19,12 +19,18 @@ import E2EEWorker from "livekit-client/e2ee-worker?worker";
import { logger } from "matrix-js-sdk/lib/logger";
import { type MatrixRTCSession } from "matrix-js-sdk/lib/matrixrtc";
import { useObservable, useObservableEagerState } from "observable-hooks";
import { map } from "rxjs";
import {
map,
NEVER,
type Observable,
type Subscription,
switchMap,
} from "rxjs";
import { defaultLiveKitOptions } from "./options";
import { type SFUConfig } from "./openIDSFU";
import { type MuteStates } from "../room/MuteStates";
import { type MediaDeviceHandle, useMediaDevices } from "./MediaDevicesContext";
import { useMediaDevices } from "../MediaDevicesContext";
import {
type ECConnectionState,
useECConnectionState,
@@ -39,6 +45,8 @@ import {
import { observeTrackReference$ } from "../state/MediaViewModel";
import { useUrlParams } from "../UrlParams";
import { useInitial } from "../useInitial";
import { getValue } from "../utils/observable";
import { type SelectedDevice } from "../state/MediaDevices";
interface UseLivekitResult {
livekitRoom?: Room;
@@ -56,7 +64,9 @@ export function useLivekit(
const initialMuteStates = useInitial(() => muteStates);
const devices = useMediaDevices();
const initialDevices = useInitial(() => devices);
const initialAudioInputId = useInitial(
() => getValue(devices.audioInput.selected$)?.id,
);
// Store if audio/video are currently updating. If to prohibit unnecessary calls
// to setMicrophoneEnabled/setCameraEnabled
@@ -94,15 +104,20 @@ export function useLivekit(
...defaultLiveKitOptions,
videoCaptureDefaults: {
...defaultLiveKitOptions.videoCaptureDefaults,
deviceId: initialDevices.videoInput.selectedId,
deviceId: getValue(devices.videoInput.selected$)?.id,
processor,
},
audioCaptureDefaults: {
...defaultLiveKitOptions.audioCaptureDefaults,
deviceId: initialDevices.audioInput.selectedId,
deviceId: initialAudioInputId,
},
audioOutput: {
deviceId: initialDevices.audioOutput.selectedId,
// When using controlled audio devices, we don't want to set the
// deviceId here, because it will be set by the native app.
// (also the id does not need to match a browser device id)
deviceId: controlledAudioDevices
? undefined
: getValue(devices.audioOutput.selected$)?.id,
},
e2ee,
};
@@ -157,7 +172,7 @@ export function useLivekit(
);
const connectionState = useECConnectionState(
initialDevices.audioInput.selectedId,
initialAudioInputId,
initialMuteStates.audio.enabled,
room,
sfuConfig,
@@ -312,62 +327,65 @@ export function useLivekit(
) {
const syncDevice = (
kind: MediaDeviceKind,
device: MediaDeviceHandle,
): void => {
const id = device.selectedId;
// Detect if we're trying to use chrome's default device, in which case
// we need to to see if the default device has changed to a different device
// by comparing the group ID of the device we're using against the group ID
// of what the default device is *now*.
// This is special-cased for only audio inputs because we need to dig around
// in the LocalParticipant object for the track object and there's not a nice
// way to do that generically. There is usually no OS-level default video capture
// device anyway, and audio outputs work differently.
if (
id === "default" &&
kind === "audioinput" &&
room.options.audioCaptureDefaults?.deviceId === "default"
) {
const activeMicTrack = Array.from(
room.localParticipant.audioTrackPublications.values(),
).find((d) => d.source === Track.Source.Microphone)?.track;
selected$: Observable<SelectedDevice | undefined>,
): Subscription =>
selected$.subscribe((device) => {
if (
activeMicTrack &&
// only restart if the stream is still running: LiveKit will detect
// when a track stops & restart appropriately, so this is not our job.
// Plus, we need to avoid restarting again if the track is already in
// the process of being restarted.
activeMicTrack.mediaStreamTrack.readyState !== "ended" &&
device.selectedGroupId !==
activeMicTrack.mediaStreamTrack.getSettings().groupId
device !== undefined &&
room.getActiveDevice(kind) !== device.id
) {
// It's different, so restart the track, ie. cause Livekit to do another
// getUserMedia() call with deviceId: default to get the *new* default device.
// Note that room.switchActiveDevice() won't work: Livekit will ignore it because
// the deviceId hasn't changed (was & still is default).
room.localParticipant
.getTrackPublication(Track.Source.Microphone)
?.audioTrack?.restartTrack()
.catch((e) => {
logger.error(`Failed to restart audio device track`, e);
});
}
} else {
if (id !== undefined && room.getActiveDevice(kind) !== id) {
room
.switchActiveDevice(kind, id)
.switchActiveDevice(kind, device.id)
.catch((e) =>
logger.error(`Failed to sync ${kind} device with LiveKit`, e),
);
}
}
};
});
syncDevice("audioinput", devices.audioInput);
syncDevice("audiooutput", devices.audioOutput);
syncDevice("videoinput", devices.videoInput);
const subscriptions = [
syncDevice("audioinput", devices.audioInput.selected$),
syncDevice("audiooutput", devices.audioOutput.selected$),
syncDevice("videoinput", devices.videoInput.selected$),
// Restart the audio input track whenever we detect that the active media
// device has changed to refer to a different hardware device. We do this
// for the sake of Chrome, which provides a "default" device that is meant
// to match the system's default audio input, whatever that may be.
// This is special-cased for only audio inputs because we need to dig around
// in the LocalParticipant object for the track object and there's not a nice
// way to do that generically. There is usually no OS-level default video capture
// device anyway, and audio outputs work differently.
devices.audioInput.selected$
.pipe(switchMap((device) => device?.hardwareDeviceChange$ ?? NEVER))
.subscribe(() => {
const activeMicTrack = Array.from(
room.localParticipant.audioTrackPublications.values(),
).find((d) => d.source === Track.Source.Microphone)?.track;
if (
activeMicTrack &&
// only restart if the stream is still running: LiveKit will detect
// when a track stops & restart appropriately, so this is not our job.
// Plus, we need to avoid restarting again if the track is already in
// the process of being restarted.
activeMicTrack.mediaStreamTrack.readyState !== "ended"
) {
// Restart the track, which will cause Livekit to do another
// getUserMedia() call with deviceId: default to get the *new* default device.
// Note that room.switchActiveDevice() won't work: Livekit will ignore it because
// the deviceId hasn't changed (was & still is default).
room.localParticipant
.getTrackPublication(Track.Source.Microphone)
?.audioTrack?.restartTrack()
.catch((e) => {
logger.error(`Failed to restart audio device track`, e);
});
}
}),
];
return (): void => {
for (const s of subscriptions) s.unsubscribe();
};
}
}, [room, devices, connectionState, controlledAudioDevices]);