in packages/worker/src/transcribe.ts [168:230]
text: readFile(textPath),
json: readFile(jsonPath),
};
return { transcripts, metadata };
} catch (error) {
logger.error(
`Could not read the transcript result. Params: ${JSON.stringify(whisperBaseParams)}`,
);
throw error;
}
};
// This function is currently only used in the transcribeAndTranslate path (which at present is only used by giant).
// Giant doesn't have a UI component to provide the language of files uploaded, so we always need to detech the language
const getLanguageCode = async (
whisperBaseParams: WhisperBaseParams,
whisperX: boolean,
): Promise<InputLanguageCode> => {
// whisperx is so slow to start up let's not even bother pre-detecting the language and just let it run detection
// for both transcription and translation
if (whisperX) {
return Promise.resolve('auto');
}
// run whisper.cpp in 'detect language' mode
const dlParams = whisperParams(true, whisperBaseParams.wavPath);
const { metadata } = await runWhisper(whisperBaseParams, dlParams);
return (
languageCodes.find((c) => c === metadata.detectedLanguageCode) || 'auto'
);
};
// Note: this functionality is only for transcription jobs coming from giant at the moment, though it could be good
// to make it the standard approach for the transcription tool too (rather than what happens currently, where the
// transcription API sends two messages to the worker - one for transcription, another for transcription with translation
// (see generateOutputSignedUrlAndSendMessage in sqs.ts)
const transcribeAndTranslate = async (
whisperBaseParams: WhisperBaseParams,
whisperX: boolean,
): Promise<TranscriptionResult> => {
try {
const languageCode = await getLanguageCode(whisperBaseParams, whisperX);
const transcription = await runTranscription(
whisperBaseParams,
languageCode,
false,
whisperX,
);
// we only run language detection once,
// so need to override the detected language of future whisper runs
transcription.metadata.detectedLanguageCode =
inputToOutputLanguageCode(languageCode);
const translation =
languageCode === 'en'
? null
: await runTranscription(
whisperBaseParams,
languageCode,
true,
whisperX,
);
return {