in src/main/java/com/amazonaws/kvstranscribestreaming/KVSTranscribeStreamingLambda.java [229:263]
private void startFileToTranscribeStreaming(String inputFileName, Optional<String> languageCode) throws Exception {
// get the audio from S3
String audioFilePath = "/tmp/" + inputFileName;
AudioUtils.fetchAudio(REGION, RECORDINGS_BUCKET_NAME, INPUT_KEY_PREFIX + inputFileName, audioFilePath, getAWSCredentials());
// Now get the stream to be transcribed (and written out as transcript segments)
InputStream inputStream = new FileInputStream(audioFilePath);
try (TranscribeStreamingRetryClient client = new TranscribeStreamingRetryClient(getTranscribeCredentials(),
TRANSCRIBE_ENDPOINT, TRANSCRIBE_REGION, metricsUtil)) {
logger.info("Calling Transcribe service..");
CompletableFuture<Void> result = client.startStreamTranscription(
// since we're definitely working with telephony audio, we know that's 8 kHz
getRequest(8000, languageCode),
new FileAudioStreamPublisher(inputStream),
new StreamTranscriptionBehaviorImpl(fromCustomerSegmentWriter, TABLE_CALLER_TRANSCRIPT),
"None"
);
// Synchronous wait for stream to close, and close client connection
// Timeout of 890 seconds because the Lambda function can be run for at most 15 mins (~890 secs)
result.get(890, TimeUnit.SECONDS);
} catch (TimeoutException e) {
logger.debug("Timing out Audio file to Transcribe Streaming after 890 sec");
} catch (Exception e) {
logger.error("Error during streaming: ", e);
throw e;
}
}