gemini/multimodal-live-api/websocket-demo-app/frontend/live-media-manager.js (240 lines of code) (raw):
class LiveAudioOutputManager {
constructor() {
this.audioInputContext;
this.workletNode;
this.initialized = false;
this.audioQueue = [];
this.isPlaying = false;
this.initializeAudioContext();
}
async playAudioChunk(base64AudioChunk) {
try {
if (!this.initialized) {
await this.initializeAudioContext();
}
if (this.audioInputContext.state === "suspended") {
await this.audioInputContext.resume();
}
const arrayBuffer =
LiveAudioOutputManager.base64ToArrayBuffer(base64AudioChunk);
const float32Data =
LiveAudioOutputManager.convertPCM16LEToFloat32(arrayBuffer);
this.workletNode.port.postMessage(float32Data);
} catch (error) {
console.error("Error processing audio chunk:", error);
}
}
async initializeAudioContext() {
if (this.initialized) return;
console.log("initializeAudioContext...");
this.audioInputContext = new (window.AudioContext ||
window.webkitAudioContext)({ sampleRate: 24000 });
await this.audioInputContext.audioWorklet.addModule("pcm-processor.js");
this.workletNode = new AudioWorkletNode(
this.audioInputContext,
"pcm-processor",
);
this.workletNode.connect(this.audioInputContext.destination);
this.initialized = true;
console.log("initializeAudioContext end");
}
static base64ToArrayBuffer(base64) {
const binaryString = window.atob(base64);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
static convertPCM16LEToFloat32(pcmData) {
const inputArray = new Int16Array(pcmData);
const float32Array = new Float32Array(inputArray.length);
for (let i = 0; i < inputArray.length; i++) {
float32Array[i] = inputArray[i] / 32768;
}
return float32Array;
}
}
class LiveAudioInputManager {
constructor() {
this.audioContext;
this.mediaRecorder;
this.processor = false;
this.pcmData = [];
this.deviceId = null;
this.interval = null;
this.stream = null;
this.onNewAudioRecordingChunk = (audioData) => {
console.log("New audio recording ");
};
}
async connectMicrophone() {
this.audioContext = new AudioContext({
sampleRate: 16000,
});
let constraints = {
audio: {
channelCount: 1,
sampleRate: 16000,
},
};
if (this.deviceId) {
constraints.audio.deviceId = { exact: this.deviceId };
}
this.stream = await navigator.mediaDevices.getUserMedia(constraints);
const source = this.audioContext.createMediaStreamSource(this.stream);
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1);
this.processor.onaudioprocess = (e) => {
const inputData = e.inputBuffer.getChannelData(0);
// Convert float32 to int16
const pcm16 = new Int16Array(inputData.length);
for (let i = 0; i < inputData.length; i++) {
pcm16[i] = inputData[i] * 0x7fff;
}
this.pcmData.push(...pcm16);
};
source.connect(this.processor);
this.processor.connect(this.audioContext.destination);
this.interval = setInterval(this.recordChunk.bind(this), 1000);
}
newAudioRecording(b64AudioData) {
console.log("newAudioRecording ");
this.onNewAudioRecordingChunk(b64AudioData);
}
recordChunk() {
const buffer = new ArrayBuffer(this.pcmData.length * 2);
const view = new DataView(buffer);
this.pcmData.forEach((value, index) => {
view.setInt16(index * 2, value, true);
});
const base64 = btoa(
String.fromCharCode.apply(null, new Uint8Array(buffer)),
);
this.newAudioRecording(base64);
this.pcmData = [];
}
disconnectMicrophone() {
try {
this.processor.disconnect();
this.audioContext.close();
} catch {
console.error("Error disconnecting microphone");
}
clearInterval(this.interval);
}
async updateMicrophoneDevice(deviceId) {
this.deviceId = deviceId;
this.disconnectMicrophone();
this.connectMicrophone();
}
}
class LiveVideoManager {
constructor(previewVideoElement, previewCanvasElement) {
this.previewVideoElement = previewVideoElement;
this.previewCanvasElement = previewCanvasElement;
this.ctx = this.previewCanvasElement.getContext("2d");
this.stream = null;
this.interval = null;
this.onNewFrame = (newFrame) => {
console.log("Default new frame trigger.");
};
}
async startWebcam() {
try {
const constraints = {
video: true,
// video: {
// width: { max: 640 },
// height: { max: 480 },
// },
};
this.stream =
await navigator.mediaDevices.getUserMedia(constraints);
this.previewVideoElement.srcObject = this.stream;
} catch (err) {
console.error("Error accessing the webcam: ", err);
}
setInterval(this.newFrame.bind(this), 1000);
}
stopWebcam() {
clearInterval(this.interval);
this.stopStream();
}
stopStream() {
if (!this.stream) return;
const tracks = this.stream.getTracks();
tracks.forEach((track) => {
track.stop();
});
}
async updateWebcamDevice(deviceId) {
const constraints = {
video: { deviceId: { exact: deviceId } },
};
this.stream = await navigator.mediaDevices.getUserMedia(constraints);
this.previewVideoElement.srcObject = this.stream;
}
captureFrameB64() {
if (this.stream == null) return "";
this.previewCanvasElement.width = this.previewVideoElement.videoWidth;
this.previewCanvasElement.height = this.previewVideoElement.videoHeight;
this.ctx.drawImage(
this.previewVideoElement,
0,
0,
this.previewCanvasElement.width,
this.previewCanvasElement.height,
);
const imageData = this.previewCanvasElement
.toDataURL("image/jpeg")
.split(",")[1]
.trim();
return imageData;
}
newFrame() {
console.log("capturing new frame");
const frameData = this.captureFrameB64();
this.onNewFrame(frameData);
}
}
class LiveScreenManager {
constructor(previewVideoElement, previewCanvasElement) {
this.previewVideoElement = previewVideoElement;
this.previewCanvasElement = previewCanvasElement;
this.ctx = this.previewCanvasElement.getContext("2d");
this.stream = null;
this.interval = null;
this.onNewFrame = (newFrame) => {
console.log("Default new frame trigger: ", newFrame);
};
}
async startCapture() {
try {
this.stream = await navigator.mediaDevices.getDisplayMedia();
this.previewVideoElement.srcObject = this.stream;
} catch (err) {
console.error("Error accessing the webcam: ", err);
}
setInterval(this.newFrame.bind(this), 1000);
}
stopCapture() {
clearInterval(this.interval);
if (!this.stream) return;
const tracks = this.stream.getTracks();
tracks.forEach((track) => {
track.stop();
});
}
captureFrameB64() {
if (this.stream == null) return "";
this.previewCanvasElement.width = this.previewVideoElement.videoWidth;
this.previewCanvasElement.height = this.previewVideoElement.videoHeight;
this.ctx.drawImage(
this.previewVideoElement,
0,
0,
this.previewCanvasElement.width,
this.previewCanvasElement.height,
);
const imageData = this.previewCanvasElement
.toDataURL("image/jpeg")
.split(",")[1]
.trim();
return imageData;
}
newFrame() {
console.log("capturing new frame");
const frameData = this.captureFrameB64();
this.onNewFrame(frameData);
}
}
console.log("loaded live-media-manager.js");