in src/diarizers/data/preprocess.py [0:0]
def get_chunk(self, file, start_time):
"""Method used to get an audio chunk from an audio file given a start_time.
Args:
file (dict): dataset row containing the "audio" feature.
start_time (float): start time (in seconds) of the audio_chunk to extract.
Returns:
waveform (array): audio chunk
y (numpy array): target array.
labels (list): list of speakers in chunk.
"""
sample_rate = file["audio"][0]["sampling_rate"]
assert sample_rate == self.sample_rate
end_time = start_time + self.chunk_duration
start_frame = math.floor(start_time * sample_rate)
num_frames_waveform = math.floor(self.chunk_duration * sample_rate)
end_frame = start_frame + num_frames_waveform
waveform = file["audio"][0]["array"][start_frame:end_frame]
labels = self.get_labels_in_file(file)
file_segments = self.get_segments_in_file(file, labels)
chunk_segments = file_segments[(file_segments["start"] < end_time) & (file_segments["end"] > start_time)]
# compute frame resolution:
# resolution = self.chunk_duration / self.num_frames_per_chunk
# discretize chunk annotations at model output resolution
step = self.model.receptive_field.step
half = 0.5 * self.model.receptive_field.duration
# discretize chunk annotations at model output resolution
start = np.maximum(chunk_segments["start"], start_time) - start_time - half
start_idx = np.maximum(0, np.round(start / step)).astype(int)
# start_idx = np.floor(start / resolution).astype(int)
end = np.minimum(chunk_segments["end"], end_time) - start_time - half
end_idx = np.round(end / step).astype(int)
# end_idx = np.ceil(end / resolution).astype(int)
# get list and number of labels for current scope
labels = list(np.unique(chunk_segments["labels"]))
num_labels = len(labels)
# initial frame-level targets
y = np.zeros((self.num_frames_per_chunk, num_labels), dtype=np.uint8)
# map labels to indices
mapping = {label: idx for idx, label in enumerate(labels)}
for start, end, label in zip(start_idx, end_idx, chunk_segments["labels"]):
mapped_label = mapping[label]
y[start : end + 1, mapped_label] = 1
return waveform, y, labels