in conversational-webgpu/src/worker.js [121:136]
async function vad(buffer) {
const input = new Tensor("float32", buffer, [1, buffer.length]);
const { stateN, output } = await silero_vad({ input, sr, state });
state = stateN; // Update state
const isSpeech = output.data[0];
// Use heuristics to determine if the buffer is speech or not
return (
// Case 1: We are above the threshold (definitely speech)
isSpeech > SPEECH_THRESHOLD ||
// Case 2: We are in the process of recording, and the probability is above the negative (exit) threshold
(isRecording && isSpeech >= EXIT_THRESHOLD)
);
}