libs/voicefocus/voicefocus.js (325 lines of code) (raw):

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 "use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.getAudioInput = exports.createAudioContext = exports.VoiceFocus = void 0; const decider_js_1 = require("./decider.js"); const fetch_js_1 = require("./fetch.js"); const loader_js_1 = require("./loader.js"); const support_js_1 = require("./support.js"); const worklet_inline_node_js_1 = require("./worklet-inline-node.js"); const worklet_worker_sab_node_js_1 = require("./worklet-worker-sab-node.js"); const worklet_worker_postMessage_node_js_1 = require("./worklet-worker-postMessage-node.js"); const DEFAULT_AGC_DISABLED_SETTING = { useVoiceFocusAGC: false, useBuiltInAGC: true, }; const DEFAULT_AGC_SETTING = DEFAULT_AGC_DISABLED_SETTING; const DEFAULT_ASSET_GROUP = 'stable-v1'; const DEFAULT_CDN = 'https://static.sdkassets.chime.aws'; const DEFAULT_PATHS = { processors: `${DEFAULT_CDN}/processors/`, workers: `${DEFAULT_CDN}/workers/`, wasm: `${DEFAULT_CDN}/wasm/`, models: `${DEFAULT_CDN}/wasm/`, }; const DEFAULT_CONTEXT_HINT = { latencyHint: 0, }; const BASE_AUDIO_CONSTRAINTS = { channelCount: 1, echoCancellation: true, googEchoCancellation: true, noiseSuppression: false, googNoiseSuppression: false, googHighpassFilter: false, googTypingNoiseDetection: false, }; const DEFAULT_AUDIO_CONSTRAINTS_WITH_BUILTIN_AGC = Object.assign(Object.assign({}, BASE_AUDIO_CONSTRAINTS), { autoGainControl: true, googAutoGainControl: true, googAutoGainControl2: true }); const DEFAULT_AUDIO_CONSTRAINTS_WITHOUT_BUILTIN_AGC = Object.assign(Object.assign({}, BASE_AUDIO_CONSTRAINTS), { autoGainControl: false, googAutoGainControl: false, googAutoGainControl2: false }); const PROCESSORS = { 'voicefocus-worker-sab-processor': { file: 'worklet-worker-sab-processor-v1.js', node: worklet_worker_sab_node_js_1.default, }, 'voicefocus-worker-postMessage-processor': { file: 'worklet-worker-postMessage-processor-v1.js', node: worklet_worker_postMessage_node_js_1.default, }, 'voicefocus-inline-processor': { file: 'worklet-inline-processor-v1.js', node: worklet_inline_node_js_1.default, }, }; const validateAssetSpec = (assetGroup, revisionID) => { if (assetGroup !== undefined && !fetch_js_1.isValidAssetGroup(assetGroup)) { throw new Error(`Invalid asset group ${assetGroup}`); } if (revisionID !== undefined && !fetch_js_1.isValidRevisionID(revisionID)) { throw new Error(`Invalid revision ID ${revisionID}`); } }; const mungeConstraints = (constraints, agc) => { let defaultConstraints; if (agc.useBuiltInAGC) { defaultConstraints = DEFAULT_AUDIO_CONSTRAINTS_WITH_BUILTIN_AGC; } else { defaultConstraints = DEFAULT_AUDIO_CONSTRAINTS_WITHOUT_BUILTIN_AGC; } if (!constraints) { return { audio: defaultConstraints }; } if (!constraints.audio) { return constraints; } if (constraints.video) { throw new Error('Not adding Voice Focus to multi-device getUserMedia call.'); } return Object.assign(Object.assign({}, constraints), { audio: constraints.audio === true ? defaultConstraints : Object.assign(Object.assign({}, constraints.audio), defaultConstraints) }); }; const urlForModel = (model, paths) => { return `${paths.models}${decider_js_1.decideModel(model)}.wasm`; }; class VoiceFocus { constructor(worker, processorURL, nodeConstructor, nodeOptions, executionQuanta) { this.processorURL = processorURL; this.nodeConstructor = nodeConstructor; this.nodeOptions = nodeOptions; this.executionQuanta = executionQuanta; this.internal = { worker, nodeOptions, isDestroyed: false, }; } static isSupported(spec, options) { const { fetchBehavior, logger } = options || {}; if (typeof globalThis === 'undefined') { logger === null || logger === void 0 ? void 0 : logger.debug('Browser does not have globalThis.'); return Promise.resolve(false); } if (!support_js_1.supportsAudioWorklet(globalThis, logger)) { logger === null || logger === void 0 ? void 0 : logger.debug('Browser does not support Audio Worklet.'); return Promise.resolve(false); } if (!support_js_1.supportsWASM(globalThis, logger)) { logger === null || logger === void 0 ? void 0 : logger.debug('Browser does not support WASM.'); return Promise.resolve(false); } if (!support_js_1.supportsWASMStreaming(globalThis, logger)) { logger === null || logger === void 0 ? void 0 : logger.debug('Browser does not support streaming WASM compilation.'); } const { assetGroup = DEFAULT_ASSET_GROUP, revisionID, paths = DEFAULT_PATHS, } = spec || {}; validateAssetSpec(assetGroup, revisionID); const assetConfig = revisionID ? { revisionID } : { assetGroup }; const updatedFetchBehavior = fetch_js_1.addQueryParams(fetchBehavior, assetConfig); const fetchConfig = Object.assign(Object.assign({}, updatedFetchBehavior), { paths }); return support_js_1.supportsVoiceFocusWorker(globalThis, fetchConfig, logger); } static mungeExecutionPreference(preference, logger) { const isAuto = (preference === undefined || preference === 'auto'); if (support_js_1.isSafari(globalThis)) { if (isAuto || preference === 'inline') { return 'inline'; } if (!isAuto) { throw new Error(`Unsupported execution preference ${preference}`); } } if (preference === 'worker-sab' && !support_js_1.supportsSharedArrayBuffer(globalThis, globalThis, logger)) { throw new Error(`Unsupported execution preference ${preference}`); } return preference || 'auto'; } static configure(spec, options) { return __awaiter(this, void 0, void 0, function* () { const { fetchBehavior, preResolve, logger, } = options || {}; const { category = 'voicefocus', name = 'default', variant: variantPreference = 'auto', assetGroup = DEFAULT_ASSET_GROUP, revisionID, simd = 'detect', executionPreference = 'auto', executionQuantaPreference, usagePreference = 'interactivity', estimatorBudget = 100, paths = DEFAULT_PATHS, thresholds, } = spec || {}; logger === null || logger === void 0 ? void 0 : logger.debug('Configuring Voice Focus with spec', spec); if (category !== undefined && category !== 'voicefocus') { throw new Error(`Unrecognized category ${category}`); } if (name !== undefined && name !== 'default' && name !== 'ns_es') { throw new Error(`Unrecognized feature name ${name}`); } if (variantPreference !== undefined && !['auto', 'c100', 'c50', 'c20', 'c10'].includes(variantPreference)) { throw new Error(`Unrecognized feature variant ${variantPreference}`); } if (executionQuantaPreference !== undefined && ![1, 2, 3].includes(executionQuantaPreference)) { throw new Error(`Unrecognized execution quanta preference ${executionQuantaPreference}`); } validateAssetSpec(assetGroup, revisionID); if (simd !== undefined && !['detect', 'force', 'disable'].includes(simd)) { throw new Error(`Unrecognized SIMD option ${simd}`); } if (executionPreference !== undefined && !['auto', 'inline', 'worker', 'worker-sab', 'worker-postMessage'].includes(executionPreference)) { throw new Error(`Unrecognized execution preference ${executionPreference}`); } if (usagePreference !== undefined && !['quality', 'interactivity'].includes(usagePreference)) { throw new Error(`Unrecognized usage preference ${usagePreference}`); } const executionSpec = { executionPreference: this.mungeExecutionPreference(executionPreference, logger), usagePreference, executionQuantaPreference, variantPreference, namePreference: name, simdPreference: simd, estimatorBudget, }; const assetConfig = revisionID ? { revisionID } : { assetGroup }; const updatedFetchBehavior = fetch_js_1.addQueryParams(fetchBehavior, assetConfig); const fetchConfig = Object.assign({ paths }, updatedFetchBehavior); const executionDefinition = yield decider_js_1.measureAndDecideExecutionApproach(executionSpec, fetchConfig, logger, thresholds); if (executionDefinition.supported === false) { return { supported: false, reason: executionDefinition.reason }; } logger === null || logger === void 0 ? void 0 : logger.info('Decided execution approach', executionDefinition); const { useSIMD, processor, variant, executionQuanta } = executionDefinition; const model = { category: category || 'voicefocus', name: name || 'default', variant, simd: useSIMD, }; if (preResolve) { const startingURL = urlForModel(model, paths); model.url = yield fetch_js_1.resolveURL(startingURL, updatedFetchBehavior); } return { fetchConfig, model, processor, executionQuanta, supported: true, }; }); } static init(configuration, { delegate, preload = true, logger, }) { return __awaiter(this, void 0, void 0, function* () { if (configuration.supported === false) { throw new Error('Voice Focus not supported. Reason: ' + configuration.reason); } const { model, processor, fetchConfig, executionQuanta, } = configuration; const { simd, name } = model; const { paths } = fetchConfig; if (processor !== 'voicefocus-inline-processor' && processor !== 'voicefocus-worker-postMessage-processor' && processor !== 'voicefocus-worker-sab-processor') { throw new Error(`Unknown processor ${processor}`); } const modelURL = model.url || urlForModel(model, paths); logger === null || logger === void 0 ? void 0 : logger.debug(`Using model URL ${modelURL}.`); const audioBufferURL = `${paths.wasm}audio_buffer-v1${simd ? '_simd' : ''}.wasm`; const resamplerURL = `${paths.wasm}resampler-v1${simd ? '_simd' : ''}.wasm`; const workerURL = `${paths.workers}worker-v1.js`; const { file, node } = PROCESSORS[processor]; const processorURL = `${paths.processors}${file}`; const worker = yield loader_js_1.loadWorker(workerURL, 'VoiceFocusWorker', fetchConfig, logger); if (preload) { logger === null || logger === void 0 ? void 0 : logger.debug('Preloading', modelURL); let message = support_js_1.supportsWASMPostMessage(globalThis) ? 'get-module' : 'get-module-buffer'; worker.postMessage({ message, preload: true, key: 'model', fetchBehavior: fetchConfig, path: modelURL, }); } const numberOfInputs = (name === 'ns_es') ? 2 : 1; const nodeOptions = { processor, worker, audioBufferURL, resamplerURL, fetchBehavior: fetchConfig, modelURL, delegate, logger, numberOfInputs, }; const factory = new VoiceFocus(worker, processorURL, node, nodeOptions, executionQuanta); return Promise.resolve(factory); }); } createNode(context, options) { var _a; if (this.internal.isDestroyed) { throw new Error('Unable to create node because VoiceFocus worker has been destroyed.'); } const { voiceFocusSampleRate = (context.sampleRate === 16000 ? 16000 : 48000), enabled = true, agc = DEFAULT_AGC_SETTING, } = options || {}; const supportFarendStream = options === null || options === void 0 ? void 0 : options.es; const processorOptions = { voiceFocusSampleRate, enabled, sendBufferCount: 10, prefill: 6, agc, executionQuanta: this.executionQuanta, supportFarendStream, }; const url = fetch_js_1.withQueryString(this.processorURL, (_a = this.nodeOptions) === null || _a === void 0 ? void 0 : _a.fetchBehavior); return context.audioWorklet .addModule(url) .then(() => new (this.nodeConstructor)(context, Object.assign(Object.assign({}, this.nodeOptions), { processorOptions }))); } applyToStream(stream, context, options) { return __awaiter(this, void 0, void 0, function* () { if (this.internal.isDestroyed) { throw new Error("Unable to apply stream because VoiceFocus worker has been destroyed"); } const source = context.createMediaStreamSource(stream); const node = yield this.applyToSourceNode(source, context, options); const destination = context.createMediaStreamDestination(); node.connect(destination); return { node, source, destination, stream: destination.stream, }; }); } applyToSourceNode(source, context, options) { return __awaiter(this, void 0, void 0, function* () { const node = yield this.createNode(context, options); source.connect(node); return node; }); } destroy() { if (this.internal.worker) { this.internal.worker.terminate(); } this.internal.isDestroyed = true; } } exports.VoiceFocus = VoiceFocus; const createAudioContext = (contextHint = DEFAULT_CONTEXT_HINT) => { return new (window.AudioContext || window.webkitAudioContext)(contextHint); }; exports.createAudioContext = createAudioContext; const getAudioInput = (context, inputOptions, voiceFocusOptions) => __awaiter(void 0, void 0, void 0, function* () { var _a, _b; const { constraints, spec, delegate, preload = true, options } = inputOptions; const { logger } = voiceFocusOptions; const config = yield VoiceFocus.configure(spec, voiceFocusOptions); if (!config.supported) { (_a = voiceFocusOptions.logger) === null || _a === void 0 ? void 0 : _a.warn('Voice Focus not supported; returning standard stream.'); return window.navigator.mediaDevices.getUserMedia(constraints); } const factory = yield VoiceFocus.init(config, { delegate, preload, logger }); const agc = ((_b = inputOptions.options) === null || _b === void 0 ? void 0 : _b.agc) || DEFAULT_AGC_SETTING; const input = yield window.navigator.mediaDevices.getUserMedia(mungeConstraints(constraints, agc)); return factory.applyToStream(input, context, options).then(result => result.stream); }); exports.getAudioInput = getAudioInput;