async collectExample()

in speech-commands/src/browser_fft_recognizer.ts [667:821]


  async collectExample(word: string, options?: ExampleCollectionOptions):
      Promise<SpectrogramData> {
    tf.util.assert(
        !this.streaming,
        () => 'Cannot start collection of transfer-learning example because ' +
            'a streaming recognition or transfer-learning example collection ' +
            'is ongoing');
    tf.util.assert(
        word != null && typeof word === 'string' && word.length > 0,
        () => `Must provide a non-empty string when collecting transfer-` +
            `learning example`);

    if (options == null) {
      options = {};
    }
    if (options.durationMultiplier != null && options.durationSec != null) {
      throw new Error(
          `durationMultiplier and durationSec are mutually exclusive, ` +
          `but are both specified.`);
    }

    let numFramesPerSpectrogram: number;
    if (options.durationSec != null) {
      tf.util.assert(
          options.durationSec > 0,
          () =>
              `Expected durationSec to be > 0, but got ${options.durationSec}`);
      const frameDurationSec =
          this.parameters.fftSize / this.parameters.sampleRateHz;
      numFramesPerSpectrogram =
          Math.ceil(options.durationSec / frameDurationSec);
    } else if (options.durationMultiplier != null) {
      tf.util.assert(
          options.durationMultiplier >= 1,
          () => `Expected duration multiplier to be >= 1, ` +
              `but got ${options.durationMultiplier}`);
      numFramesPerSpectrogram =
          Math.round(this.nonBatchInputShape[0] * options.durationMultiplier);
    } else {
      numFramesPerSpectrogram = this.nonBatchInputShape[0];
    }

    if (options.snippetDurationSec != null) {
      tf.util.assert(
          options.snippetDurationSec > 0,
          () => `snippetDurationSec is expected to be > 0, but got ` +
              `${options.snippetDurationSec}`);
      tf.util.assert(
          options.onSnippet != null,
          () => `onSnippet must be provided if snippetDurationSec ` +
              `is provided.`);
    }
    if (options.onSnippet != null) {
      tf.util.assert(
          options.snippetDurationSec != null,
          () => `snippetDurationSec must be provided if onSnippet ` +
              `is provided.`);
    }
    const frameDurationSec =
        this.parameters.fftSize / this.parameters.sampleRateHz;
    const totalDurationSec = frameDurationSec * numFramesPerSpectrogram;

    this.streaming = true;
    return new Promise<SpectrogramData>(resolve => {
      const stepFactor = options.snippetDurationSec == null ?
          1 :
          options.snippetDurationSec / totalDurationSec;
      const overlapFactor = 1 - stepFactor;
      const callbackCountTarget = Math.round(1 / stepFactor);
      let callbackCount = 0;
      let lastIndex = -1;
      const spectrogramSnippets: Float32Array[] = [];

      const spectrogramCallback: SpectrogramCallback =
          async (freqData: tf.Tensor, timeData?: tf.Tensor) => {
        // TODO(cais): can we consolidate the logic in the two branches?
        if (options.onSnippet == null) {
          const normalizedX = normalize(freqData);
          this.dataset.addExample({
            label: word,
            spectrogram: {
              data: await normalizedX.data() as Float32Array,
              frameSize: this.nonBatchInputShape[1],
            },
            rawAudio: options.includeRawAudio ? {
              data: await timeData.data() as Float32Array,
              sampleRateHz: this.audioDataExtractor.sampleRateHz
            } :
                                                undefined
          });
          normalizedX.dispose();
          await this.audioDataExtractor.stop();
          this.streaming = false;
          this.collateTransferWords();
          resolve({
            data: await freqData.data() as Float32Array,
            frameSize: this.nonBatchInputShape[1],
          });
        } else {
          const data = await freqData.data() as Float32Array;
          if (lastIndex === -1) {
            lastIndex = data.length;
          }
          let i = lastIndex - 1;
          while (data[i] !== 0 && i >= 0) {
            i--;
          }
          const increment = lastIndex - i - 1;
          lastIndex = i + 1;
          const snippetData = data.slice(data.length - increment, data.length);
          spectrogramSnippets.push(snippetData);

          if (options.onSnippet != null) {
            options.onSnippet(
                {data: snippetData, frameSize: this.nonBatchInputShape[1]});
          }

          if (callbackCount++ === callbackCountTarget) {
            await this.audioDataExtractor.stop();
            this.streaming = false;
            this.collateTransferWords();

            const normalized = normalizeFloat32Array(
                concatenateFloat32Arrays(spectrogramSnippets));
            const finalSpectrogram: SpectrogramData = {
              data: normalized,
              frameSize: this.nonBatchInputShape[1]
            };
            this.dataset.addExample({
              label: word,
              spectrogram: finalSpectrogram,
              rawAudio: options.includeRawAudio ? {
                data: await timeData.data() as Float32Array,
                sampleRateHz: this.audioDataExtractor.sampleRateHz
              } :
                                                  undefined
            });
            // TODO(cais): Fix 1-tensor memory leak.
            resolve(finalSpectrogram);
          }
        }
        return false;
      };
      this.audioDataExtractor = new BrowserFftFeatureExtractor({
        sampleRateHz: this.parameters.sampleRateHz,
        numFramesPerSpectrogram,
        columnTruncateLength: this.nonBatchInputShape[1],
        suppressionTimeMillis: 0,
        spectrogramCallback,
        overlapFactor,
        includeRawAudio: options.includeRawAudio
      });
      this.audioDataExtractor.start(options.audioTrackConstraints);
    });
  }