def crop_audio()

in src/diarizers/data/speaker_diarization.py [0:0]


    def crop_audio(self, files):
        # Load audio from path
        new_batch = {
            "audio": [],
            "timestamps_start": [],
            "timestamps_end": [],
            "speakers": [],
        }

        batch = [{key: values[i] for key, values in files.items()} for i in range(len(files["audio"]))]

        for file in batch:
            # Crop audio based on timestamps (in samples)

            # We add a file only if it's annotated:
            if len(file["timestamps_start"]) != 0:
                start_idx = int(file["timestamps_start"][0] * self.sample_rate)
                end_idx = int(max(file["timestamps_end"]) * self.sample_rate)

                waveform = file["audio"]["array"]

                audio = {
                    "array": np.array(waveform[start_idx:end_idx]),
                    "sampling_rate": self.sample_rate,
                }

                timestamps_start = [start - file["timestamps_start"][0] for start in file["timestamps_start"]]
                timestamps_end = [end - file["timestamps_start"][0] for end in file["timestamps_end"]]

                new_batch["audio"].append(audio)
                new_batch["timestamps_start"].append(timestamps_start)
                new_batch["timestamps_end"].append(timestamps_end)
                new_batch["speakers"].append(file["speakers"])

        return new_batch