export default async function playCognitiveServicesStream()

in packages/directlinespeech/src/playCognitiveServicesStream.js [103:207]


export default async function playCognitiveServicesStream(audioContext, stream, { signal = {} } = {}) {
  if (!audioContext) {
    throw new Error('botframework-directlinespeech-sdk: audioContext must be specified.');
  } else if (!stream) {
    throw new Error('botframework-directlinespeech-sdk: stream must be specified.');
  } else if (!stream.format) {
    throw new Error('botframework-directlinespeech-sdk: stream is missing format.');
  } else if (typeof stream.read !== 'function') {
    throw new Error('botframework-directlinespeech-sdk: stream is missing read().');
  }

  const queuedBufferSourceNodes = [];

  try {
    const { format } = stream;
    const abortPromise = abortToReject(signal);
    const array = new Uint8Array(DEFAULT_BUFFER_SIZE);

    const read = () =>
      Promise.race([
        abortPromise.catch(() => {
          // Abort will gracefully end the queue. We will check signal.aborted later to throw abort exception.
        }),
        stream
          .read(array.buffer)
          .then(numBytes => (numBytes === array.byteLength ? array : numBytes ? array.slice(0, numBytes) : undefined))
      ]);

    if (signal.aborted) {
      throw new Error('aborted');
    }

    let { samplesPerSec } = format;

    // TODO: [P0] #3692 Remove the following if-condition block when the underlying bugs are resolved.
    //       There is a bug in Speech SDK 1.15.0 that returns 24kHz instead of 16kHz.
    //       Even if we explicitly specify the output audio format to 16kHz, there is another bug that ignored it.
    //       In short, DLSpeech service currently always streams in RIFF WAV format, instead of MP3.
    //       https://github.com/microsoft/cognitive-services-speech-sdk-js/issues/313
    //       https://github.com/microsoft/cognitive-services-speech-sdk-js/issues/314
    if (format.requestAudioFormatString === 'audio-24khz-48kbitrate-mono-mp3') {
      samplesPerSec = 16000;
    }

    let newSamplesPerSec = samplesPerSec;
    let sampleRateMultiplier = 1;

    // Safari requires a minimum sample rate of 22100 Hz.
    // A multiplier is calculated the the data meets the minimum sample rate.
    // An integer-based multiplier to simplify our upsampler.
    // For security, data will only be upsampled up to 96000 Hz.
    while (newSamplesPerSec < MIN_SAMPLE_RATE && newSamplesPerSec < 96000) {
      sampleRateMultiplier++;
      newSamplesPerSec = samplesPerSec * sampleRateMultiplier;
    }

    // The third parameter is the sample size in bytes.
    // For example, if the Speech SDK sends Web Chat 4096 bytes of 16-bit samples, there will be 2048 samples per channel.
    // The multi-buffering player is set up to handle 2048 samples per buffer.
    // If the multiplier 3x, it will handle 6144 samples per buffer.
    const player = createMultiBufferingPlayer(
      audioContext,
      { ...format, samplesPerSec: newSamplesPerSec },
      (DEFAULT_BUFFER_SIZE / (format.bitsPerSample / 8)) * sampleRateMultiplier
    );

    // For security, the maximum number of chunks handled will be 1000.
    for (
      let chunk = await read(), maxChunks = 0;
      chunk && maxChunks < 1000 && !signal.aborted;
      chunk = await read(), maxChunks++
    ) {
      if (signal.aborted) {
        break;
      }

      // Data received from Speech SDK is interleaved; 2 channels (e.g. A and B) will be sent as "ABABABABAB"
      // And each sample (A/B) will be an 8 to 32-bit number.

      // Convert the 8 - 32-bit number into a floating-point number, as required by Web Audio API.
      const interleavedArray = formatAudioDataArrayBufferToFloatArray(format, chunk.buffer);

      // Deinterleave data back into two array buffer, e.g. "AAAAA" and "BBBBB".
      const multiChannelArray = deinterleave(interleavedArray, format);

      // Upsample data if necessary. If the multiplier is 2x, "AAAAA" will be upsampled to "AAAAAAAAAA" (with anti-alias).
      const upsampledMultiChannelArray = multiChannelArray.map(array =>
        multiplySampleRate(array, sampleRateMultiplier)
      );

      // Queue to the buffering player.
      player.push(upsampledMultiChannelArray);
    }

    abortPromise.catch(() => player.cancelAll());

    if (signal.aborted) {
      throw new Error('aborted');
    }

    await Promise.race([abortPromise, player.flush()]);
  } finally {
    queuedBufferSourceNodes.forEach(node => node.stop());
  }
}