From cd9b80b96617919760a4a3ff11bdf3ecbff62e4f Mon Sep 17 00:00:00 2001 From: Arnaud Robin Date: Sun, 30 Mar 2025 03:33:11 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8(frontend)=20add=20face=20tracking?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement face landmark processor to be able to track face live. --- .../components/blur/FaceLandmarksProcessor.ts | 251 ++++++++++++++++++ .../rooms/livekit/components/blur/index.ts | 9 +- .../effects/EffectsConfiguration.tsx | 45 +++- src/frontend/src/locales/de/rooms.json | 29 +- src/frontend/src/locales/en/rooms.json | 5 + src/frontend/src/locales/fr/rooms.json | 19 +- src/frontend/src/locales/nl/rooms.json | 29 +- 7 files changed, 356 insertions(+), 31 deletions(-) create mode 100644 src/frontend/src/features/rooms/livekit/components/blur/FaceLandmarksProcessor.ts diff --git a/src/frontend/src/features/rooms/livekit/components/blur/FaceLandmarksProcessor.ts b/src/frontend/src/features/rooms/livekit/components/blur/FaceLandmarksProcessor.ts new file mode 100644 index 00000000..da368f70 --- /dev/null +++ b/src/frontend/src/features/rooms/livekit/components/blur/FaceLandmarksProcessor.ts @@ -0,0 +1,251 @@ +import { ProcessorOptions, Track } from 'livekit-client' +import posthog from 'posthog-js' +import { + FilesetResolver, + FaceLandmarker, + FaceLandmarkerResult, +} from '@mediapipe/tasks-vision' +import { + CLEAR_TIMEOUT, + SET_TIMEOUT, + TIMEOUT_TICK, + timerWorkerScript, +} from './TimerWorker' +import { + BackgroundProcessorInterface, + BackgroundOptions, + ProcessorType, +} from '.' + +const PROCESSING_WIDTH = 256 * 3 +const PROCESSING_HEIGHT = 144 * 3 + +const FACE_LANDMARKS_CANVAS_ID = 'face-landmarks-local' + +export class FaceLandmarksProcessor implements BackgroundProcessorInterface { + options: BackgroundOptions + name: string + processedTrack?: MediaStreamTrack | undefined + + source?: MediaStreamTrack + sourceSettings?: MediaTrackSettings + videoElement?: HTMLVideoElement + videoElementLoaded?: boolean + + // Canvas containing the video processing result + outputCanvas?: HTMLCanvasElement + outputCanvasCtx?: CanvasRenderingContext2D + + faceLandmarker?: FaceLandmarker + faceLandmarkerResult?: FaceLandmarkerResult + + // The resized image of the video source + sourceImageData?: ImageData + + timerWorker?: Worker + + type: ProcessorType + + constructor(opts: BackgroundOptions) { + this.name = 'face_landmarks' + this.options = opts + this.type = ProcessorType.FACE_LANDMARKS + } + + static get isSupported() { + return true // Face landmarks should work in all modern browsers + } + + async init(opts: ProcessorOptions) { + if (!opts.element) { + throw new Error('Element is required for processing') + } + + this.source = opts.track as MediaStreamTrack + this.sourceSettings = this.source!.getSettings() + this.videoElement = opts.element as HTMLVideoElement + + this._createMainCanvas() + + const stream = this.outputCanvas!.captureStream() + const tracks = stream.getVideoTracks() + if (tracks.length == 0) { + throw new Error('No tracks found for processing') + } + this.processedTrack = tracks[0] + + await this.initFaceLandmarker() + this._initWorker() + + posthog.capture('face-landmarks-init') + } + + _initWorker() { + this.timerWorker = new Worker(timerWorkerScript, { + name: 'FaceLandmarks', + }) + this.timerWorker.onmessage = (data) => this.onTimerMessage(data) + if (this.videoElementLoaded) { + this.timerWorker!.postMessage({ + id: SET_TIMEOUT, + timeMs: 1000 / 30, + }) + } else { + this.videoElement!.onloadeddata = () => { + this.videoElementLoaded = true + this.timerWorker!.postMessage({ + id: SET_TIMEOUT, + timeMs: 1000 / 30, + }) + } + } + } + + onTimerMessage(response: { data: { id: number } }) { + if (response.data.id === TIMEOUT_TICK) { + this.process() + } + } + + async initFaceLandmarker() { + const vision = await FilesetResolver.forVisionTasks( + 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision/wasm' + ) + this.faceLandmarker = await FaceLandmarker.createFromOptions(vision, { + baseOptions: { + modelAssetPath: + 'https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task', + delegate: 'GPU', + }, + runningMode: 'VIDEO', + outputFaceBlendshapes: true, + outputFacialTransformationMatrixes: true, + }) + } + + async sizeSource() { + this.outputCanvasCtx?.drawImage( + this.videoElement!, + 0, + 0, + this.videoElement!.videoWidth, + this.videoElement!.videoHeight, + 0, + 0, + PROCESSING_WIDTH, + PROCESSING_HEIGHT + ) + + this.sourceImageData = this.outputCanvasCtx?.getImageData( + 0, + 0, + PROCESSING_WIDTH, + PROCESSING_HEIGHT + ) + } + + async detectFaces() { + const startTimeMs = performance.now() + this.faceLandmarkerResult = this.faceLandmarker!.detectForVideo( + this.sourceImageData!, + startTimeMs + ) + } + + async drawFaceLandmarks() { + // Draw the original video frame at the canvas size + this.outputCanvasCtx!.drawImage( + this.videoElement!, + 0, + 0, + this.videoElement!.videoWidth, + this.videoElement!.videoHeight, + 0, + 0, + PROCESSING_WIDTH, + PROCESSING_HEIGHT + ) + + if (!this.faceLandmarkerResult?.faceLandmarks) { + return + } + + // Draw face landmarks + this.outputCanvasCtx!.strokeStyle = '#00FF00' + this.outputCanvasCtx!.lineWidth = 2 + + for (const face of this.faceLandmarkerResult.faceLandmarks) { + for (const landmark of face) { + // Use the same dimensions as the canvas/video display size + const x = landmark.x * PROCESSING_WIDTH + const y = landmark.y * PROCESSING_HEIGHT + + this.outputCanvasCtx!.beginPath() + this.outputCanvasCtx!.arc(x, y, 2, 0, 2 * Math.PI) + this.outputCanvasCtx!.stroke() + } + } + } + + async process() { + await this.sizeSource() + await this.detectFaces() + await this.drawFaceLandmarks() + + this.timerWorker!.postMessage({ + id: SET_TIMEOUT, + timeMs: 1000 / 30, + }) + } + + _createMainCanvas() { + this.outputCanvas = document.querySelector( + `#${FACE_LANDMARKS_CANVAS_ID}` + ) as HTMLCanvasElement + if (!this.outputCanvas) { + this.outputCanvas = this._createCanvas( + FACE_LANDMARKS_CANVAS_ID, + PROCESSING_WIDTH, + PROCESSING_HEIGHT + ) + } + this.outputCanvasCtx = this.outputCanvas.getContext('2d')! + } + + _createCanvas(id: string, width: number, height: number) { + const element = document.createElement('canvas') + element.setAttribute('id', id) + element.setAttribute('width', '' + width) + element.setAttribute('height', '' + height) + return element + } + + update(opts: BackgroundOptions): void { + this.options = opts + } + + async restart(opts: ProcessorOptions) { + await this.destroy() + return this.init(opts) + } + + async destroy() { + this.timerWorker?.postMessage({ + id: CLEAR_TIMEOUT, + }) + + this.timerWorker?.terminate() + this.faceLandmarker?.close() + } + + clone() { + return new FaceLandmarksProcessor(this.options) + } + + serialize() { + return { + type: this.type, + options: this.options, + } + } +} \ No newline at end of file diff --git a/src/frontend/src/features/rooms/livekit/components/blur/index.ts b/src/frontend/src/features/rooms/livekit/components/blur/index.ts index 893b9eb0..61220587 100644 --- a/src/frontend/src/features/rooms/livekit/components/blur/index.ts +++ b/src/frontend/src/features/rooms/livekit/components/blur/index.ts @@ -3,10 +3,12 @@ import { Track, TrackProcessor } from 'livekit-client' import { BackgroundBlurTrackProcessorJsWrapper } from './BackgroundBlurTrackProcessorJsWrapper' import { BackgroundCustomProcessor } from './BackgroundCustomProcessor' import { BackgroundVirtualTrackProcessorJsWrapper } from './BackgroundVirtualTrackProcessorJsWrapper' +import { FaceLandmarksProcessor } from './FaceLandmarksProcessor' export type BackgroundOptions = { blurRadius?: number imagePath?: string + showFaceLandmarks?: boolean } export interface ProcessorSerialized { @@ -25,11 +27,12 @@ export interface BackgroundProcessorInterface export enum ProcessorType { BLUR = 'blur', VIRTUAL = 'virtual', + FACE_LANDMARKS = 'faceLandmarks' } export class BackgroundProcessorFactory { static isSupported() { - return ProcessorWrapper.isSupported || BackgroundCustomProcessor.isSupported + return ProcessorWrapper.isSupported || BackgroundCustomProcessor.isSupported || FaceLandmarksProcessor.isSupported } static getProcessor( @@ -50,6 +53,10 @@ export class BackgroundProcessorFactory { if (BackgroundCustomProcessor.isSupported) { return new BackgroundCustomProcessor(opts) } + } else if (type === ProcessorType.FACE_LANDMARKS) { + if (FaceLandmarksProcessor.isSupported) { + return new FaceLandmarksProcessor(opts) + } } return undefined } diff --git a/src/frontend/src/features/rooms/livekit/components/effects/EffectsConfiguration.tsx b/src/frontend/src/features/rooms/livekit/components/effects/EffectsConfiguration.tsx index b5765ded..c0a59ef3 100644 --- a/src/frontend/src/features/rooms/livekit/components/effects/EffectsConfiguration.tsx +++ b/src/frontend/src/features/rooms/livekit/components/effects/EffectsConfiguration.tsx @@ -15,7 +15,7 @@ import { BlurOnStrong } from '@/components/icons/BlurOnStrong' import { useTrackToggle } from '@livekit/components-react' import { Loader } from '@/primitives/Loader' import { useSyncAfterDelay } from '@/hooks/useSyncAfterDelay' -import { RiProhibited2Line } from '@remixicon/react' +import { RiProhibited2Line, RiUserVoiceLine } from '@remixicon/react' enum BlurRadius { NONE = 0, @@ -302,6 +302,49 @@ export const EffectsConfiguration = ({ +
+ + {t('faceLandmarks.title')} + +
+ + await toggleEffect(ProcessorType.FACE_LANDMARKS, { + blurRadius: 0, + }) + } + isSelected={isSelected(ProcessorType.FACE_LANDMARKS, { + blurRadius: 0, + })} + data-attr="toggle-face-landmarks" + > + + +
+