9-realtime/realtime-from-scratch/app/hooks/useAudioPlayer.ts (121 lines of code) (raw):

import { useState, useCallback, useRef, useEffect } from "react"; interface useAudioPlayerOptions { bufferThreshold?: number; defaultFormat?: "mu-law" | "pcm-16"; } interface useAudioPlayerReturn { isPlaying: boolean; addChunk: (base64Chunk: string, format?: "mu-law" | "pcm-16") => void; stop: () => void; } export function useAudioPlayer( options: useAudioPlayerOptions = {} ): useAudioPlayerReturn { const { defaultFormat = "pcm-16" } = options; const [isPlaying, setIsPlaying] = useState<boolean>(false); const audioContextRef = useRef<AudioContext | null>(null); const workletNodeRef = useRef<AudioWorkletNode | null>(null); const base64ToFloat32Array = ( base64: string, format: "mu-law" | "pcm-16" ): Float32Array => { const binaryString = window.atob(base64); if (format === "mu-law") { const len = binaryString.length; const float32Array = new Float32Array(len); for (let i = 0; i < len; i++) { const ulawByte = binaryString.charCodeAt(i); const sample = ulawDecode(ulawByte); float32Array[i] = sample / 32768; // Normalize to [-1, 1] } return float32Array; } else if (format === "pcm-16") { const len = binaryString.length; const samples = len / 2; const float32Array = new Float32Array(samples); for (let i = 0; i < samples; i++) { const offset = i * 2; const low = binaryString.charCodeAt(offset); const high = binaryString.charCodeAt(offset + 1); // Combine the two bytes and interpret as signed 16-bit integer (little endian) let sample = (high << 8) | low; if (sample >= 0x8000) sample = sample - 0x10000; // Convert to signed float32Array[i] = sample / 32768; // Normalize to [-1, 1] } return float32Array; } else { throw new Error("Unsupported audio format"); } }; function ulawDecode(u_val: number): number { const BIAS = 0x84; u_val = ~u_val & 0xff; let t = ((u_val & 0x0f) << 3) + BIAS; t <<= (u_val & 0x70) >> 4; return (u_val & 0x80 ? BIAS - t : t - BIAS) as number; } const initAudioContext = useCallback(async (sampleRate: number) => { if ( audioContextRef.current && audioContextRef.current.sampleRate !== sampleRate ) { // Close existing context if sample rate has changed await audioContextRef.current.close(); audioContextRef.current = null; workletNodeRef.current = null; } if (!audioContextRef.current) { console.log("Creating new AudioContext with sample rate:", sampleRate); audioContextRef.current = new AudioContext({ sampleRate }); // Define your module code as a string const moduleCode = ` class BufferProcessor extends AudioWorkletProcessor { constructor() { super(); this.buffer = new Float32Array(0); this.port.onmessage = (event) => { const newData = event.data; const currentBufferLength = this.buffer.length; const newBuffer = new Float32Array(currentBufferLength + newData.length); newBuffer.set(this.buffer); newBuffer.set(newData, currentBufferLength); this.buffer = newBuffer; }; } process(inputs, outputs) { const output = outputs[0]; const channel = output[0]; const bufferLength = channel.length; if (this.buffer.length >= bufferLength) { channel.set(this.buffer.subarray(0, bufferLength)); this.buffer = this.buffer.subarray(bufferLength); } else { // If not enough data, fill with zeros (silence) channel.fill(0); } return true; } } registerProcessor('buffer-processor', BufferProcessor); `; // Create a Blob from the module code const blob = new Blob([moduleCode], { type: "application/javascript" }); const moduleURL = URL.createObjectURL(blob); try { await audioContextRef.current.audioWorklet.addModule(moduleURL); } catch (error) { console.error("Error loading audio worklet module:", error); } } }, []); const startPlayback = useCallback(() => { if (!audioContextRef.current || workletNodeRef.current) return; const audioContext = audioContextRef.current; const workletNode = new AudioWorkletNode(audioContext, "buffer-processor"); workletNode.connect(audioContext.destination); workletNodeRef.current = workletNode; setIsPlaying(true); }, []); const addChunk = useCallback( (base64Chunk: string, format?: "mu-law" | "pcm-16") => { const audioFormat = format || defaultFormat; const sampleRate = audioFormat === "mu-law" ? 8000 : 24000; initAudioContext(sampleRate).then(() => { const newChunk = base64ToFloat32Array(base64Chunk, audioFormat); if (!workletNodeRef.current) { startPlayback(); } // Send data to the AudioWorklet for playback workletNodeRef.current?.port.postMessage(newChunk); }); }, [initAudioContext, startPlayback, defaultFormat] ); const stop = useCallback(() => { if (workletNodeRef.current) { workletNodeRef.current.disconnect(); workletNodeRef.current = null; } if (audioContextRef.current) { audioContextRef.current.close(); audioContextRef.current = null; } setIsPlaying(false); }, []); useEffect(() => { return () => { stop(); }; }, [stop]); return { isPlaying, addChunk, stop }; }