def redact_audio()

in infra-as-code/modules/ingest-pipeline/cf-audio-redaction/audio_redaction.py [0:0]


    def redact_audio(self, event, redacted_audios_bucket_name):
        """
        Redacts audio based on the provided event data.

        Args:
            event: A dictionary containing information about the audio file.
        """
        
        try:
            json_file = self.download_from_gcs(self.bucket_name, self.transcript_file_name)

            src_bucket_name = event.get("event_bucket")
            audio_file_name = event.get("event_filename")

            print("1) Download original audio from GCS")
            tmp_audio_file = self.download_audio_from_gcs(src_bucket_name, audio_file_name)

            print("2) Call DLP and redact audio file using FFMPEG")
            for result in json_file['results']:

                print("DLP: add findings to transcript")
                redacted_transcript = self.redact_text(result['alternatives'])
                print(redacted_transcript)

                print("Extract intervals for redaction")
                redaction_intervals = self.get_redaction_intervals(result['alternatives'][0])
                print(redaction_intervals)

                self.redact_audio_file(redaction_intervals, tmp_audio_file)

            print("3) Upload redacted audio to corresponding bucket in GCS")
            self.upload_file_to_gcs(redacted_audios_bucket_name, f"/tmp/{tmp_audio_file}", audio_file_name)


            print(json_file)
            with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_json_file:
                json.dump(json_file, tmp_json_file, indent=4)
                tmp_json_file_name = tmp_json_file.name

            print("4) Upload modified JSON file to GCS")
            self.upload_file_to_gcs(self.bucket_name, tmp_json_file_name, self.transcript_file_name) 

        except Exception as e:
            print(f"An error occurred: {e}")