in speech-commands/src/browser_fft_recognizer.ts [667:821]
async collectExample(word: string, options?: ExampleCollectionOptions):
Promise<SpectrogramData> {
tf.util.assert(
!this.streaming,
() => 'Cannot start collection of transfer-learning example because ' +
'a streaming recognition or transfer-learning example collection ' +
'is ongoing');
tf.util.assert(
word != null && typeof word === 'string' && word.length > 0,
() => `Must provide a non-empty string when collecting transfer-` +
`learning example`);
if (options == null) {
options = {};
}
if (options.durationMultiplier != null && options.durationSec != null) {
throw new Error(
`durationMultiplier and durationSec are mutually exclusive, ` +
`but are both specified.`);
}
let numFramesPerSpectrogram: number;
if (options.durationSec != null) {
tf.util.assert(
options.durationSec > 0,
() =>
`Expected durationSec to be > 0, but got ${options.durationSec}`);
const frameDurationSec =
this.parameters.fftSize / this.parameters.sampleRateHz;
numFramesPerSpectrogram =
Math.ceil(options.durationSec / frameDurationSec);
} else if (options.durationMultiplier != null) {
tf.util.assert(
options.durationMultiplier >= 1,
() => `Expected duration multiplier to be >= 1, ` +
`but got ${options.durationMultiplier}`);
numFramesPerSpectrogram =
Math.round(this.nonBatchInputShape[0] * options.durationMultiplier);
} else {
numFramesPerSpectrogram = this.nonBatchInputShape[0];
}
if (options.snippetDurationSec != null) {
tf.util.assert(
options.snippetDurationSec > 0,
() => `snippetDurationSec is expected to be > 0, but got ` +
`${options.snippetDurationSec}`);
tf.util.assert(
options.onSnippet != null,
() => `onSnippet must be provided if snippetDurationSec ` +
`is provided.`);
}
if (options.onSnippet != null) {
tf.util.assert(
options.snippetDurationSec != null,
() => `snippetDurationSec must be provided if onSnippet ` +
`is provided.`);
}
const frameDurationSec =
this.parameters.fftSize / this.parameters.sampleRateHz;
const totalDurationSec = frameDurationSec * numFramesPerSpectrogram;
this.streaming = true;
return new Promise<SpectrogramData>(resolve => {
const stepFactor = options.snippetDurationSec == null ?
1 :
options.snippetDurationSec / totalDurationSec;
const overlapFactor = 1 - stepFactor;
const callbackCountTarget = Math.round(1 / stepFactor);
let callbackCount = 0;
let lastIndex = -1;
const spectrogramSnippets: Float32Array[] = [];
const spectrogramCallback: SpectrogramCallback =
async (freqData: tf.Tensor, timeData?: tf.Tensor) => {
// TODO(cais): can we consolidate the logic in the two branches?
if (options.onSnippet == null) {
const normalizedX = normalize(freqData);
this.dataset.addExample({
label: word,
spectrogram: {
data: await normalizedX.data() as Float32Array,
frameSize: this.nonBatchInputShape[1],
},
rawAudio: options.includeRawAudio ? {
data: await timeData.data() as Float32Array,
sampleRateHz: this.audioDataExtractor.sampleRateHz
} :
undefined
});
normalizedX.dispose();
await this.audioDataExtractor.stop();
this.streaming = false;
this.collateTransferWords();
resolve({
data: await freqData.data() as Float32Array,
frameSize: this.nonBatchInputShape[1],
});
} else {
const data = await freqData.data() as Float32Array;
if (lastIndex === -1) {
lastIndex = data.length;
}
let i = lastIndex - 1;
while (data[i] !== 0 && i >= 0) {
i--;
}
const increment = lastIndex - i - 1;
lastIndex = i + 1;
const snippetData = data.slice(data.length - increment, data.length);
spectrogramSnippets.push(snippetData);
if (options.onSnippet != null) {
options.onSnippet(
{data: snippetData, frameSize: this.nonBatchInputShape[1]});
}
if (callbackCount++ === callbackCountTarget) {
await this.audioDataExtractor.stop();
this.streaming = false;
this.collateTransferWords();
const normalized = normalizeFloat32Array(
concatenateFloat32Arrays(spectrogramSnippets));
const finalSpectrogram: SpectrogramData = {
data: normalized,
frameSize: this.nonBatchInputShape[1]
};
this.dataset.addExample({
label: word,
spectrogram: finalSpectrogram,
rawAudio: options.includeRawAudio ? {
data: await timeData.data() as Float32Array,
sampleRateHz: this.audioDataExtractor.sampleRateHz
} :
undefined
});
// TODO(cais): Fix 1-tensor memory leak.
resolve(finalSpectrogram);
}
}
return false;
};
this.audioDataExtractor = new BrowserFftFeatureExtractor({
sampleRateHz: this.parameters.sampleRateHz,
numFramesPerSpectrogram,
columnTruncateLength: this.nonBatchInputShape[1],
suppressionTimeMillis: 0,
spectrogramCallback,
overlapFactor,
includeRawAudio: options.includeRawAudio
});
this.audioDataExtractor.start(options.audioTrackConstraints);
});
}