in infra-as-code/modules/ingest-pipeline/cf-transcript-correction/lib.py [0:0]
def extract_transcripts(self, stt_transcript):
"""Loops through whole transcript and ignores offsets, keeps
only the transcript, channel and its index
Args:
stt_transcript (dict): STT transcription downloaded from GCS
Returns:
list[dict]: List of dictionaries with transcript values
"""
transcripts = []
for i, results in enumerate(stt_transcript['results']):
transcripts.append({"index": i, "transcript": results["alternatives"][0]["transcript"], "channelTag": results["channelTag"]})
return transcripts