in sanity_checks/check_preprocessing.py [0:0]
def get_chunk_from_pyannote(pyannote_task, file_id, start_time, duration):
"""Get a chunk from audio file using a pyannote task object.
Args:
pyannote_task (pyannote.audio.tasks.segmentation.speaker_diarization.SpeakerDiarization):
pyannote SpeakerDiarization task object, with AMI__SpeakerDiarization__only_words as protocol.
file_id (int): ID of the AMI dataset file.
start_time (float): chunk start time.
duration (float): chunk duration.
Returns:
chunk: dict containing:
'X': waveform tensor
'y': pyannote SlidingWindowFeature with the target
'meta': dict with metadata.
"""
pyannote_task.prepare_data()
pyannote_task.setup()
chunk = pyannote_task.prepare_chunk(file_id, start_time, duration)
return chunk