in source/lambda/transcribeaudio/transcribeaudio.js [131:185]
async function transcribeAudio(params) {
try {
/**
* First try and purge the previous transcribe job
* Transcribe uses the job id as the output result file
* name in S3 which is our link to videoId so we must remove
* prior runs
*/
await removeTranscribeJob(params);
var transcribeParams = {
Media: {
MediaFileUri: params.mediaFileUrl,
},
Subtitles: {
Formats: ["srt"],
},
MediaFormat: "mp4",
OutputKey: "sourcecaptions/",
LanguageCode: "zh-CN",
TranscriptionJobName: params.videoId,
OutputBucketName: params.outputBucket,
};
transcribeParams.LanguageCode = params.transcribeLanguage;
if (params.vocabularyExists) {
console.log("[INFO] found existing vocabulary enabling");
transcribeParams.Settings = {
VocabularyName: params.vocabularyName,
};
} else {
console.log(
"[INFO] no existing vocabulary found, skipping vocabulary use"
);
}
console.log(
"[INFO] about to launch Transcribe job with params: %j",
transcribeParams
);
var transcribeResult = await transcribe
.startTranscriptionJob(transcribeParams)
.promise();
console.log(
"[INFO] got startTranscriptionJob() response: %j",
transcribeResult
);
} catch (error) {
console.log("[ERROR] failed to transcribe audio", error);
throw error;
}
}