def extract_transcripts()

in infra-as-code/modules/ingest-pipeline/cf-transcript-correction/lib.py [0:0]


  def extract_transcripts(self, stt_transcript):
    """Loops through whole transcript and ignores offsets, keeps 
       only the transcript, channel and its index

    Args:
        stt_transcript (dict): STT transcription downloaded from GCS

    Returns:
        list[dict]: List of dictionaries with transcript values
    """
    transcripts = []
    for i, results in enumerate(stt_transcript['results']): 
        transcripts.append({"index": i, "transcript": results["alternatives"][0]["transcript"], "channelTag": results["channelTag"]})
    return transcripts