export async function transcribeAndUpload()

in speech-to-text/functions/src/transcribe-audio.ts [36:126]


export async function transcribeAndUpload({
  client,
  file: {bucket, name},
  sampleRateHertz,
  audioChannelCount,
}: {
  client: SpeechClient;
  file: {bucket: Bucket; name: string};
  sampleRateHertz: number;
  audioChannelCount: number;
}): Promise<TranscribeAudioResult> {
  const inputUri = `gs://${bucket.name}/${name}`;
  const outputUri = `gs://${bucket.name}/${name.replace(
    'tmp/',
    ''
  )}_transcription.txt`;
  const warnings: WarningType[] = [];
  const request: google.cloud.speech.v1.ILongRunningRecognizeRequest = {
    config: {
      encoding,
      enableAutomaticPunctuation: config.enableAutomaticPunctuation,
      sampleRateHertz,
      languageCode: config.languageCode,
      model: config.model,
      audioChannelCount,
    },

    audio: {
      uri: inputUri,
    },

    outputConfig: {
      gcsUri: outputUri,
    },
  };

  const response = await transcribe(client, request);

  if (response.outputError) {
    return {
      status: Status.FAILURE,
      warnings,
      type: FailureType.TRANSCRIPTION_UPLOAD_FAILED,
      details: {
        outputUri: response.outputConfig?.gcsUri,
        outputError: response.outputError,
      },
    };
  }

  logs.receivedLongRunningRecognizeResponse(response);
  if (response.results == null) {
    return {
      status: Status.FAILURE,
      warnings,
      type: FailureType.NULL_TRANSCRIPTION,
    };
  }

  // Intermediate step prior to proper simplification
  const transcription: Record<number, string[]> | null =
    getTranscriptionsByChannel(response.results);

  if (transcription == null) {
    return {
      status: Status.FAILURE,
      warnings,
      type: FailureType.NULL_TRANSCRIPTION,
    };
  }

  // The `transcription` is simpler than the one that's usually given
  // by the cloud call because, for example, we don't give the option
  // to request many candidate transcriptions from speech to text.
  //
  // However, the simplification doesn't happen for the file uploaded to storage
  // by the cloud speech API. So the file uploaded to storage by the cloud speech
  // API is more complicated than the file we could be uploading if we took charge
  // of upload. There's a couple reasonable uptions here:
  // (a) We could choose not to simplify, to harness the cloud speech API's upload
  //     capabilities.
  // (b) We could stop using the upload capabilities of the cloud speech API,
  //     uploading a simplified file through the extension itself.

  logs.logResponseTranscription(transcription);
  return {
    status: Status.SUCCESS,
    warnings,
    transcription,
  };
}