async listen()

in speech-commands/src/browser_fft_recognizer.ts [174:281]


  async listen(
      callback: RecognizerCallback,
      config?: StreamingRecognitionConfig): Promise<void> {
    if (this.streaming) {
      throw new Error(
          'Cannot start streaming again when streaming is ongoing.');
    }

    await this.ensureModelLoaded();

    if (config == null) {
      config = {};
    }
    let probabilityThreshold =
        config.probabilityThreshold == null ? 0 : config.probabilityThreshold;
    if (config.includeEmbedding) {
      // Override probability threshold to 0 if includeEmbedding is true.
      probabilityThreshold = 0;
    }
    tf.util.assert(
        probabilityThreshold >= 0 && probabilityThreshold <= 1,
        () => `Invalid probabilityThreshold value: ${probabilityThreshold}`);
    let invokeCallbackOnNoiseAndUnknown =
        config.invokeCallbackOnNoiseAndUnknown == null ?
        false :
        config.invokeCallbackOnNoiseAndUnknown;
    if (config.includeEmbedding) {
      // Override invokeCallbackOnNoiseAndUnknown threshold to true if
      // includeEmbedding is true.
      invokeCallbackOnNoiseAndUnknown = true;
    }

    if (config.suppressionTimeMillis < 0) {
      throw new Error(
          `suppressionTimeMillis is expected to be >= 0, ` +
          `but got ${config.suppressionTimeMillis}`);
    }

    const overlapFactor =
        config.overlapFactor == null ? 0.5 : config.overlapFactor;
    tf.util.assert(
        overlapFactor >= 0 && overlapFactor < 1,
        () => `Expected overlapFactor to be >= 0 and < 1, but got ${
            overlapFactor}`);

    const spectrogramCallback: SpectrogramCallback =
        async (x: tf.Tensor, timeData?: tf.Tensor) => {
      const normalizedX = normalize(x);
      let y: tf.Tensor;
      let embedding: tf.Tensor;
      if (config.includeEmbedding) {
        await this.ensureModelWithEmbeddingOutputCreated();
        [y, embedding] =
            this.modelWithEmbeddingOutput.predict(normalizedX) as tf.Tensor[];
      } else {
        y = this.model.predict(normalizedX) as tf.Tensor;
      }

      const scores = await y.data() as Float32Array;
      const maxIndexTensor = y.argMax(-1);
      const maxIndex = (await maxIndexTensor.data())[0];
      const maxScore = Math.max(...scores);
      tf.dispose([y, maxIndexTensor, normalizedX]);

      if (maxScore < probabilityThreshold) {
        return false;
      } else {
        let spectrogram: SpectrogramData = undefined;
        if (config.includeSpectrogram) {
          spectrogram = {
            data: await x.data() as Float32Array,
            frameSize: this.nonBatchInputShape[1],
          };
        }

        let wordDetected = true;
        if (!invokeCallbackOnNoiseAndUnknown) {
          // Skip background noise and unknown tokens.
          if (this.words[maxIndex] === BACKGROUND_NOISE_TAG ||
              this.words[maxIndex] === UNKNOWN_TAG) {
            wordDetected = false;
          }
        }
        if (wordDetected) {
          callback({scores, spectrogram, embedding});
        }
        // Trigger suppression only if the word is neither unknown or
        // background noise.
        return wordDetected;
      }
    };

    const suppressionTimeMillis = config.suppressionTimeMillis == null ?
        this.DEFAULT_SUPPRESSION_TIME_MILLIS :
        config.suppressionTimeMillis;
    this.audioDataExtractor = new BrowserFftFeatureExtractor({
      sampleRateHz: this.parameters.sampleRateHz,
      numFramesPerSpectrogram: this.nonBatchInputShape[0],
      columnTruncateLength: this.nonBatchInputShape[1],
      suppressionTimeMillis,
      spectrogramCallback,
      overlapFactor
    });

    await this.audioDataExtractor.start(config.audioTrackConstraints);

    this.streaming = true;
  }