in api/run_eval.py [0:0]
def process_sample(sample):
if use_url:
reference = sample["row"]["text"].strip() or " "
audio_duration = sample["row"]["audio_length_s"]
start = time.time()
try:
transcription = transcribe_with_retry(
model_name, None, sample, use_url=True
)
except Exception as e:
print(f"Failed to transcribe after retries: {e}")
return None
else:
reference = sample.get("norm_text", "").strip() or " "
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
sf.write(
tmpfile.name,
sample["audio"]["array"],
sample["audio"]["sampling_rate"],
format="WAV",
)
tmp_path = tmpfile.name
audio_duration = (
len(sample["audio"]["array"]) / sample["audio"]["sampling_rate"]
)
start = time.time()
try:
transcription = transcribe_with_retry(
model_name, tmp_path, sample, use_url=False
)
except Exception as e:
print(f"Failed to transcribe after retries: {e}")
os.unlink(tmp_path)
return None
finally:
if os.path.exists(tmp_path):
os.unlink(tmp_path)
else:
print(f"File {tmp_path} does not exist")
transcription_time = time.time() - start
return reference, transcription, audio_duration, transcription_time