9-realtime/realtime-from-scratch/app/hooks/useMicrophone.ts (48 lines of code) (raw):
import { useState, useCallback, useRef, useEffect } from "react";
interface UseMicrophoneOptions {
chunkSize?: number;
targetSampleRate?: number;
}
type AudioChunkHandler = (chunk: string) => void;
interface UseMicrophoneReturn {
isListening: boolean;
startListening: () => Promise<void>;
stopListening: () => void;
addAudioChunkHandler: (handler: AudioChunkHandler) => void;
removeAudioChunkHandler: (handler: AudioChunkHandler) => void;
}
export function useMicrophone({
chunkSize = 4096,
targetSampleRate = 24000,
}: UseMicrophoneOptions = {}): UseMicrophoneReturn {
const [isListening, setIsListening] = useState<boolean>(false);
const mediaStreamRef = useRef<MediaStream | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const processorRef = useRef<ScriptProcessorNode | null>(null);
const handlersRef = useRef<Set<AudioChunkHandler>>(new Set());
const downsample = (
inputBuffer: Float32Array,
inputSampleRate: number,
outputSampleRate: number
): Float32Array => {
if (inputSampleRate === outputSampleRate) {
return inputBuffer;
}
const ratio = inputSampleRate / outputSampleRate;
const outputLength = Math.round(inputBuffer.length / ratio);
const result = new Float32Array(outputLength);
let offsetResult = 0;
let offsetBuffer = 0;
while (offsetResult < result.length) {
const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
let accum = 0,
count = 0;
for (
let i = offsetBuffer;
i < nextOffsetBuffer && i < inputBuffer.length;
i++
) {
accum += inputBuffer[i];
count++;
}
result[offsetResult] = accum / count;
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result;
};
const processAudioChunk = (
inputBuffer: Float32Array,
sampleRate: number
): string => {
const downsampledBuffer = downsample(
inputBuffer,
sampleRate,
targetSampleRate
);
const output = new Int16Array(downsampledBuffer.length);
for (let i = 0; i < downsampledBuffer.length; i++) {
const s = Math.max(-1, Math.min(1, downsampledBuffer[i]));
output[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
}
const uint8Array = new Uint8Array(output.buffer);
return btoa(String.fromCharCode.apply(null, Array.from(uint8Array)));
};
const startListening = useCallback(async (): Promise<void> => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaStreamRef.current = stream;
audioContextRef.current = new AudioContext();
const source = audioContextRef.current.createMediaStreamSource(stream);
const processor = audioContextRef.current.createScriptProcessor(
chunkSize,
1,
1
);
processorRef.current = processor;
processor.onaudioprocess = (event: AudioProcessingEvent) => {
const inputBuffer = event.inputBuffer.getChannelData(0);
const base64Chunk = processAudioChunk(
inputBuffer,
audioContextRef.current!.sampleRate
);
handlersRef.current.forEach((handler) => handler(base64Chunk));
};
source.connect(processor);
processor.connect(audioContextRef.current.destination);
setIsListening(true);
} catch (error) {
console.error("Error starting microphone:", error);
}
}, [chunkSize, targetSampleRate]);
const stopListening = useCallback((): void => {
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach((track) => track.stop());
}
if (processorRef.current && audioContextRef.current) {
processorRef.current.disconnect();
audioContextRef.current.close();
}
setIsListening(false);
}, []);
const addAudioChunkHandler = useCallback(
(handler: AudioChunkHandler): void => {
handlersRef.current.add(handler);
},
[]
);
const removeAudioChunkHandler = useCallback(
(handler: AudioChunkHandler): void => {
handlersRef.current.delete(handler);
},
[]
);
useEffect(() => {
return () => {
stopListening();
};
}, [stopListening]);
return {
isListening,
startListening,
stopListening,
addAudioChunkHandler,
removeAudioChunkHandler,
};
}