def order_transcript()

in infra-as-code/modules/ingest-pipeline/cf-stt-transcript/lib.py [0:0]


  def order_transcript(self, bucket_name, filename):
    """Downloads the transcript and orders the transcript by offset
       in order to have the turns correctly and avoids the empty transcripts
       and uploads the ordered transcript to gcs

    Args:
        bucket_name (str): Bucket name
        filename (str): Blob name
    """
    bucket = self.storage_client.bucket(bucket_name)
    blob = bucket.blob(filename)

    contents = blob.download_as_text()
    transcript_data = json.loads(contents)

    sorted_results = sorted(
        (item for item in transcript_data["results"] if "alternatives" in item),
        key=lambda x: float(x["resultEndOffset"].replace("s", "")) )

    transcript_data["results"] = sorted_results

    modified_contents = json.dumps(transcript_data, indent=2)
    blob.upload_from_string(modified_contents, content_type='application/json')
    print(f"Modified JSON file '{filename}' successfully updated in bucket '{bucket_name}'.")