in src/diarizers/data/speaker_diarization.py [0:0]
def crop_audio(self, files):
# Load audio from path
new_batch = {
"audio": [],
"timestamps_start": [],
"timestamps_end": [],
"speakers": [],
}
batch = [{key: values[i] for key, values in files.items()} for i in range(len(files["audio"]))]
for file in batch:
# Crop audio based on timestamps (in samples)
# We add a file only if it's annotated:
if len(file["timestamps_start"]) != 0:
start_idx = int(file["timestamps_start"][0] * self.sample_rate)
end_idx = int(max(file["timestamps_end"]) * self.sample_rate)
waveform = file["audio"]["array"]
audio = {
"array": np.array(waveform[start_idx:end_idx]),
"sampling_rate": self.sample_rate,
}
timestamps_start = [start - file["timestamps_start"][0] for start in file["timestamps_start"]]
timestamps_end = [end - file["timestamps_start"][0] for end in file["timestamps_end"]]
new_batch["audio"].append(audio)
new_batch["timestamps_start"].append(timestamps_start)
new_batch["timestamps_end"].append(timestamps_end)
new_batch["speakers"].append(file["speakers"])
return new_batch