in infra-as-code/modules/ingest-pipeline/cf-audio-redaction/audio_redaction.py [0:0]
def redact_audio(self, event, redacted_audios_bucket_name):
"""
Redacts audio based on the provided event data.
Args:
event: A dictionary containing information about the audio file.
"""
try:
json_file = self.download_from_gcs(self.bucket_name, self.transcript_file_name)
src_bucket_name = event.get("event_bucket")
audio_file_name = event.get("event_filename")
print("1) Download original audio from GCS")
tmp_audio_file = self.download_audio_from_gcs(src_bucket_name, audio_file_name)
print("2) Call DLP and redact audio file using FFMPEG")
for result in json_file['results']:
print("DLP: add findings to transcript")
redacted_transcript = self.redact_text(result['alternatives'])
print(redacted_transcript)
print("Extract intervals for redaction")
redaction_intervals = self.get_redaction_intervals(result['alternatives'][0])
print(redaction_intervals)
self.redact_audio_file(redaction_intervals, tmp_audio_file)
print("3) Upload redacted audio to corresponding bucket in GCS")
self.upload_file_to_gcs(redacted_audios_bucket_name, f"/tmp/{tmp_audio_file}", audio_file_name)
print(json_file)
with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_json_file:
json.dump(json_file, tmp_json_file, indent=4)
tmp_json_file_name = tmp_json_file.name
print("4) Upload modified JSON file to GCS")
self.upload_file_to_gcs(self.bucket_name, tmp_json_file_name, self.transcript_file_name)
except Exception as e:
print(f"An error occurred: {e}")