in speech-to-text/functions/src/transcribe-audio.ts [36:126]
export async function transcribeAndUpload({
client,
file: {bucket, name},
sampleRateHertz,
audioChannelCount,
}: {
client: SpeechClient;
file: {bucket: Bucket; name: string};
sampleRateHertz: number;
audioChannelCount: number;
}): Promise<TranscribeAudioResult> {
const inputUri = `gs://${bucket.name}/${name}`;
const outputUri = `gs://${bucket.name}/${name.replace(
'tmp/',
''
)}_transcription.txt`;
const warnings: WarningType[] = [];
const request: google.cloud.speech.v1.ILongRunningRecognizeRequest = {
config: {
encoding,
enableAutomaticPunctuation: config.enableAutomaticPunctuation,
sampleRateHertz,
languageCode: config.languageCode,
model: config.model,
audioChannelCount,
},
audio: {
uri: inputUri,
},
outputConfig: {
gcsUri: outputUri,
},
};
const response = await transcribe(client, request);
if (response.outputError) {
return {
status: Status.FAILURE,
warnings,
type: FailureType.TRANSCRIPTION_UPLOAD_FAILED,
details: {
outputUri: response.outputConfig?.gcsUri,
outputError: response.outputError,
},
};
}
logs.receivedLongRunningRecognizeResponse(response);
if (response.results == null) {
return {
status: Status.FAILURE,
warnings,
type: FailureType.NULL_TRANSCRIPTION,
};
}
// Intermediate step prior to proper simplification
const transcription: Record<number, string[]> | null =
getTranscriptionsByChannel(response.results);
if (transcription == null) {
return {
status: Status.FAILURE,
warnings,
type: FailureType.NULL_TRANSCRIPTION,
};
}
// The `transcription` is simpler than the one that's usually given
// by the cloud call because, for example, we don't give the option
// to request many candidate transcriptions from speech to text.
//
// However, the simplification doesn't happen for the file uploaded to storage
// by the cloud speech API. So the file uploaded to storage by the cloud speech
// API is more complicated than the file we could be uploading if we took charge
// of upload. There's a couple reasonable uptions here:
// (a) We could choose not to simplify, to harness the cloud speech API's upload
// capabilities.
// (b) We could stop using the upload capabilities of the cloud speech API,
// uploading a simplified file through the extension itself.
logs.logResponseTranscription(transcription);
return {
status: Status.SUCCESS,
warnings,
transcription,
};
}