Add custom audio renderer for iPhone earpiece and only render joined participants (#3249)

* Add custom audio renderer to only render joined participants & add ios earpice workaround fix left right to match chromium + safari (firefox is swapped) earpice as setting Simpler code and documentation The doc explains, what this class actually does and why it is so complicated. Signed-off-by: Timo K <toger5@hotmail.de> use only one audioContext, remove (non working) standby fallback * Add tests * use optional audio context and effect to initiate it + review
2025-05-15 20:46:39 +02:00
parent 86d80630c1
commit b5fe55aef2
17 changed files with 588 additions and 91 deletions
--- a/src/livekit/MatrixAudioRenderer.test.tsx
+++ b/src/livekit/MatrixAudioRenderer.test.tsx
@@ -0,0 +1,104 @@
+/*
+Copyright 2023, 2024 New Vector Ltd.
+
+SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
+Please see LICENSE in the repository root for full details.
+*/
+
+import { afterEach, beforeEach, expect, it, vi } from "vitest";
+import { render } from "@testing-library/react";
+import { type CallMembership } from "matrix-js-sdk/lib/matrixrtc";
+import {
+  getTrackReferenceId,
+  type TrackReference,
+} from "@livekit/components-core";
+import { type RemoteAudioTrack } from "livekit-client";
+import { type ReactNode } from "react";
+import { useTracks } from "@livekit/components-react";
+
+import { testAudioContext } from "../useAudioContext.test";
+import * as MediaDevicesContext from "./MediaDevicesContext";
+import { MatrixAudioRenderer } from "./MatrixAudioRenderer";
+import { mockTrack } from "../utils/test";
+
+export const TestAudioContextConstructor = vi.fn(() => testAudioContext);
+
+beforeEach(() => {
+  vi.stubGlobal("AudioContext", TestAudioContextConstructor);
+});
+
+afterEach(() => {
+  vi.unstubAllGlobals();
+  vi.clearAllMocks();
+});
+
+vi.mock("@livekit/components-react", async (importOriginal) => {
+  return {
+    ...(await importOriginal()),
+    AudioTrack: (props: { trackRef: TrackReference }): ReactNode => {
+      return (
+        <audio data-testid={"audio"}>
+          {getTrackReferenceId(props.trackRef)}
+        </audio>
+      );
+    },
+    useTracks: vi.fn(),
+  };
+});
+
+const tracks = [mockTrack("test:123")];
+vi.mocked(useTracks).mockReturnValue(tracks);
+
+it("should render for member", () => {
+  const { container, queryAllByTestId } = render(
+    <MatrixAudioRenderer
+      members={[{ sender: "test", deviceId: "123" }] as CallMembership[]}
+    />,
+  );
+  expect(container).toBeTruthy();
+  expect(queryAllByTestId("audio")).toHaveLength(1);
+});
+it("should not render without member", () => {
+  const { container, queryAllByTestId } = render(
+    <MatrixAudioRenderer
+      members={[{ sender: "othermember", deviceId: "123" }] as CallMembership[]}
+    />,
+  );
+  expect(container).toBeTruthy();
+  expect(queryAllByTestId("audio")).toHaveLength(0);
+});
+
+it("should not setup audioContext gain and pan if there is no need to.", () => {
+  render(
+    <MatrixAudioRenderer
+      members={[{ sender: "test", deviceId: "123" }] as CallMembership[]}
+    />,
+  );
+  const audioTrack = tracks[0].publication.track! as RemoteAudioTrack;
+
+  expect(audioTrack.setAudioContext).toHaveBeenCalledTimes(1);
+  expect(audioTrack.setAudioContext).toHaveBeenCalledWith(undefined);
+  expect(audioTrack.setWebAudioPlugins).toHaveBeenCalledTimes(1);
+  expect(audioTrack.setWebAudioPlugins).toHaveBeenCalledWith([]);
+
+  expect(testAudioContext.gain.gain.value).toEqual(1);
+  expect(testAudioContext.pan.pan.value).toEqual(0);
+});
+it("should setup audioContext gain and pan", () => {
+  vi.spyOn(MediaDevicesContext, "useEarpieceAudioConfig").mockReturnValue({
+    pan: 1,
+    volume: 0.1,
+  });
+  render(
+    <MatrixAudioRenderer
+      members={[{ sender: "test", deviceId: "123" }] as CallMembership[]}
+    />,
+  );
+
+  const audioTrack = tracks[0].publication.track! as RemoteAudioTrack;
+  expect(audioTrack.setAudioContext).toHaveBeenCalled();
+  expect(audioTrack.setWebAudioPlugins).toHaveBeenCalled();
+
+  expect(testAudioContext.gain.gain.value).toEqual(0.1);
+  expect(testAudioContext.pan.pan.value).toEqual(1);
+});
--- a/src/livekit/MatrixAudioRenderer.tsx
+++ b/src/livekit/MatrixAudioRenderer.tsx
@@ -0,0 +1,212 @@
+/*
+Copyright 2025 New Vector Ltd.
+
+SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
+Please see LICENSE in the repository root for full details.
+*/
+
+import { getTrackReferenceId } from "@livekit/components-core";
+import { type RemoteAudioTrack, Track } from "livekit-client";
+import { useEffect, useMemo, useRef, useState, type ReactNode } from "react";
+import {
+  useTracks,
+  AudioTrack,
+  type AudioTrackProps,
+} from "@livekit/components-react";
+import { type CallMembership } from "matrix-js-sdk/lib/matrixrtc";
+import { logger } from "matrix-js-sdk/lib/logger";
+
+import { useEarpieceAudioConfig } from "./MediaDevicesContext";
+import { useReactiveState } from "../useReactiveState";
+
+export interface MatrixAudioRendererProps {
+  /**
+   * The list of participants to render audio for.
+   * This list needs to be composed based on the matrixRTC members so that we do not play audio from users
+   * that are not expected to be in the rtc session.
+   */
+  members: CallMembership[];
+  /**
+   * If set to `true`, mutes all audio tracks rendered by the component.
+   * @remarks
+   * If set to `true`, the server will stop sending audio track data to the client.
+   */
+  muted?: boolean;
+}
+
+/**
+ * The `MatrixAudioRenderer` component is a drop-in solution for adding audio to your LiveKit app.
+ * It takes care of handling remote participants’ audio tracks and makes sure that microphones and screen share are audible.
+ *
+ * It also takes care of the earpiece audio configuration for iOS devices.
+ * This is done by using the WebAudio API to create a stereo pan effect that mimics the earpiece audio.
+ * @example
+ * ```tsx
+ * <LiveKitRoom>
+ *   <MatrixAudioRenderer />
+ * </LiveKitRoom>
+ * ```
+ * @public
+ */
+export function MatrixAudioRenderer({
+  members,
+  muted,
+}: MatrixAudioRendererProps): ReactNode {
+  const validIdentities = useMemo(
+    () =>
+      new Set(members?.map((member) => `${member.sender}:${member.deviceId}`)),
+    [members],
+  );
+
+  const loggedInvalidIdentities = useRef(new Set<string>());
+  /**
+   * Log an invalid livekit track identity.
+   * A invalid identity is one that does not match any of the matrix rtc members.
+   *
+   * @param identity The identity of the track that is invalid
+   * @param validIdentities The list of valid identities
+   */
+  const logInvalid = (identity: string, validIdentities: Set<string>): void => {
+    if (loggedInvalidIdentities.current.has(identity)) return;
+    logger.warn(
+      `Audio track ${identity} has no matching matrix call member`,
+      `current members: ${Array.from(validIdentities.values())}`,
+      `track will not get rendered`,
+    );
+    loggedInvalidIdentities.current.add(identity);
+  };
+
+  const tracks = useTracks(
+    [
+      Track.Source.Microphone,
+      Track.Source.ScreenShareAudio,
+      Track.Source.Unknown,
+    ],
+    {
+      updateOnlyOn: [],
+      onlySubscribed: true,
+    },
+  ).filter((ref) => {
+    const isValid = validIdentities?.has(ref.participant.identity);
+    if (!isValid && !ref.participant.isLocal)
+      logInvalid(ref.participant.identity, validIdentities);
+    return (
+      !ref.participant.isLocal &&
+      ref.publication.kind === Track.Kind.Audio &&
+      isValid
+    );
+  });
+
+  // This component is also (in addition to the "only play audio for connected members" logic above)
+  // responsible for mimicking earpiece audio on iPhones.
+  // The Safari audio devices enumeration does not expose an earpiece audio device.
+  // We alternatively use the audioContext pan node to only use one of the stereo channels.
+
+  // This component does get additionally complicated because of a Safari bug.
+  // (see: https://bugs.webkit.org/show_bug.cgi?id=251532
+  // and the related issues: https://bugs.webkit.org/show_bug.cgi?id=237878
+  // and https://bugs.webkit.org/show_bug.cgi?id=231105)
+  //
+  // AudioContext gets stopped if the webview gets moved into the background.
+  // Once the phone is in standby audio playback will stop.
+  // So we can only use the pan trick only works is the phone is not in standby.
+  // If earpiece mode is not used we do not use audioContext to allow standby playback.
+  // shouldUseAudioContext is set to false if stereoPan === 0 to allow standby bluetooth playback.
+
+  const { pan: stereoPan, volume: volumeFactor } = useEarpieceAudioConfig();
+  const shouldUseAudioContext = stereoPan !== 0;
+
+  // initialize the potentially used audio context.
+  const [audioContext, setAudioContext] = useState<AudioContext | undefined>(
+    undefined,
+  );
+  useEffect(() => {
+    const ctx = new AudioContext();
+    setAudioContext(ctx);
+    return (): void => {
+      void ctx.close();
+    };
+  }, []);
+  const audioNodes = useMemo(
+    () => ({
+      gain: audioContext?.createGain(),
+      pan: audioContext?.createStereoPanner(),
+    }),
+    [audioContext],
+  );
+
+  // Simple effects to update the gain and pan node based on the props
+  useEffect(() => {
+    if (audioNodes.pan) audioNodes.pan.pan.value = stereoPan;
+  }, [audioNodes.pan, stereoPan]);
+  useEffect(() => {
+    if (audioNodes.gain) audioNodes.gain.gain.value = volumeFactor;
+  }, [audioNodes.gain, volumeFactor]);
+
+  return (
+    // We add all audio elements into one <div> for the browser developer tool experience/tidyness.
+    <div style={{ display: "none" }}>
+      {tracks.map((trackRef) => (
+        <AudioTrackWithAudioNodes
+          key={getTrackReferenceId(trackRef)}
+          trackRef={trackRef}
+          muted={muted}
+          audioContext={shouldUseAudioContext ? audioContext : undefined}
+          audioNodes={audioNodes}
+        />
+      ))}
+    </div>
+  );
+}
+
+interface StereoPanAudioTrackProps {
+  muted?: boolean;
+  audioContext?: AudioContext;
+  audioNodes: {
+    gain?: GainNode;
+    pan?: StereoPannerNode;
+  };
+}
+
+/**
+ * This wraps `livekit.AudioTrack` to allow adding audio nodes to a track.
+ * It main purpose is to remount the AudioTrack component when switching from
+ * audiooContext to normal audio playback.
+ * As of now the AudioTrack component does not support adding audio nodes while being mounted.
+ * @param param0
+ * @returns
+ */
+function AudioTrackWithAudioNodes({
+  trackRef,
+  muted,
+  audioContext,
+  audioNodes,
+  ...props
+}: StereoPanAudioTrackProps &
+  AudioTrackProps &
+  React.RefAttributes<HTMLAudioElement>): ReactNode {
+  // This is used to unmount/remount the AudioTrack component.
+  // Mounting needs to happen after the audioContext is set.
+  // (adding the audio context when already mounted did not work outside strict mode)
+  const [trackReady, setTrackReady] = useReactiveState(
+    () => false,
+    // We only want the track to reset once both (audioNodes and audioContext) are set.
+    // for unsetting the audioContext its enough if one of the the is undefined.
+    [audioContext && audioNodes],
+  );
+
+  useEffect(() => {
+    if (!trackRef || trackReady) return;
+    const track = trackRef.publication.track as RemoteAudioTrack;
+    const useContext = audioContext && audioNodes.gain && audioNodes.pan;
+    track.setAudioContext(useContext ? audioContext : undefined);
+    track.setWebAudioPlugins(
+      useContext ? [audioNodes.gain!, audioNodes.pan!] : [],
+    );
+    setTrackReady(true);
+  }, [audioContext, audioNodes, setTrackReady, trackReady, trackRef]);
+
+  return (
+    trackReady && <AudioTrack trackRef={trackRef} muted={muted} {...props} />
+  );
+}
--- a/src/livekit/MediaDevicesContext.tsx
+++ b/src/livekit/MediaDevicesContext.tsx
@@ -26,12 +26,16 @@ import {
  audioInput as audioInputSetting,
  audioOutput as audioOutputSetting,
  videoInput as videoInputSetting,
+  alwaysShowIphoneEarpiece as alwaysShowIphoneEarpieceSetting,
  type Setting,
 } from "../settings/settings";

+export const EARPIECE_CONFIG_ID = "earpiece-id";
+
 export type DeviceLabel =
  | { type: "name"; name: string }
  | { type: "number"; number: number }
+  | { type: "earpiece" }
  | { type: "default"; name: string | null };

 export interface MediaDevice {
@@ -40,6 +44,11 @@ export interface MediaDevice {
   */
  available: Map<string, DeviceLabel>;
  selectedId: string | undefined;
+  /**
+   * An additional device configuration that makes us use only one channel of the
+   * output device and a reduced volume.
+   */
+  useAsEarpiece: boolean | undefined;
  /**
   * The group ID of the selected device.
   */
@@ -65,6 +74,7 @@ function useMediaDevice(
 ): MediaDevice {
  // Make sure we don't needlessly reset to a device observer without names,
  // once permissions are already given
+  const [alwaysShowIphoneEarpice] = useSetting(alwaysShowIphoneEarpieceSetting);
  const hasRequestedPermissions = useRef(false);
  const requestPermissions = usingNames || hasRequestedPermissions.current;
  hasRequestedPermissions.current ||= usingNames;
@@ -102,27 +112,39 @@ function useMediaDevice(
            // Create a virtual default audio output for browsers that don't have one.
            // Its device ID must be the empty string because that's what setSinkId
            // recognizes.
+            // We also create this if we do not have any available devices, so that
+            // we can use the default or the earpiece.
+            const showEarpiece =
+              navigator.userAgent.match("iPhone") || alwaysShowIphoneEarpice;
            if (
              kind === "audiooutput" &&
-              available.size &&
              !available.has("") &&
-              !available.has("default")
+              !available.has("default") &&
+              (available.size || showEarpiece)
            )
              available = new Map([
                ["", { type: "default", name: availableRaw[0]?.label || null }],
                ...available,
              ]);
+            if (kind === "audiooutput" && showEarpiece)
+              // On IPhones we have to create a virtual earpiece device, because
+              // the earpiece is not available as a device ID.
+              available = new Map([
+                ...available,
+                [EARPIECE_CONFIG_ID, { type: "earpiece" }],
+              ]);
            // Note: creating virtual default input devices would be another problem
            // entirely, because requesting a media stream from deviceId "" won't
            // automatically track the default device.
            return available;
          }),
        ),
-      [kind, deviceObserver$],
+      [alwaysShowIphoneEarpice, deviceObserver$, kind],
    ),
  );

-  const [preferredId, select] = useSetting(setting);
+  const [preferredId, setPreferredId] = useSetting(setting);
+  const [asEarpice, setAsEarpiece] = useState(false);
  const selectedId = useMemo(() => {
    if (available.size) {
      // If the preferred device is available, use it. Or if every available
@@ -138,6 +160,7 @@ function useMediaDevice(
    }
    return undefined;
  }, [available, preferredId]);
+
  const selectedGroupId = useObservableEagerState(
    useMemo(
      () =>
@@ -151,14 +174,27 @@ function useMediaDevice(
    ),
  );

+  const select = useCallback(
+    (id: string) => {
+      if (id === EARPIECE_CONFIG_ID) {
+        setAsEarpiece(true);
+      } else {
+        setAsEarpiece(false);
+        setPreferredId(id);
+      }
+    },
+    [setPreferredId],
+  );
+
  return useMemo(
    () => ({
      available,
      selectedId,
+      useAsEarpiece: asEarpice,
      selectedGroupId,
      select,
    }),
-    [available, selectedId, selectedGroupId, select],
+    [available, selectedId, asEarpice, selectedGroupId, select],
  );
 }

@@ -167,6 +203,7 @@ export const deviceStub: MediaDevice = {
  selectedId: undefined,
  selectedGroupId: undefined,
  select: () => {},
+  useAsEarpiece: false,
 };
 export const devicesStub: MediaDevices = {
  audioInput: deviceStub,
@@ -255,3 +292,30 @@ export const useMediaDeviceNames = (
      return context.stopUsingDeviceNames;
    }
  }, [context, enabled]);
+
+/**
+ * A convenience hook to get the audio node configuration for the earpiece.
+ * It will check the `useAsEarpiece` of the `audioOutput` device and return
+ * the appropriate pan and volume values.
+ *
+ * @returns pan and volume values for the earpiece audio node configuration.
+ */
+export const useEarpieceAudioConfig = (): {
+  pan: number;
+  volume: number;
+} => {
+  const { audioOutput } = useMediaDevices();
+  // We use only the right speaker (pan = 1) for the earpiece.
+  // This mimics the behavior of the native earpiece speaker (only the top speaker on an iPhone)
+  const pan = useMemo(
+    () => (audioOutput.useAsEarpiece ? 1 : 0),
+    [audioOutput.useAsEarpiece],
+  );
+  // We also do lower the volume by a factor of 10 to optimize for the usecase where
+  // a user is holding the phone to their ear.
+  const volume = useMemo(
+    () => (audioOutput.useAsEarpiece ? 0.1 : 1),
+    [audioOutput.useAsEarpiece],
+  );
+  return { pan, volume };
+};