in src/diarizers/data/speaker_diarization.py [0:0]
def process_rttm_file(self, path_to_annotations):
"""extract the list of timestamps_start, timestamps_end and speakers
from an annotations file with path: path_to_annotations.
Args:
path_to_annotations (str): path to the annotations file.
Returns:
timestamps_start (list): A list of timestamps indicating the start of each speaker segment.
timestamps_end (list): A list of timestamps indicating the end of each speaker segment.
speakers (list): The list of audio speakers, with their order of appearance.
"""
timestamps_start = []
timestamps_end = []
speakers = []
with open(path_to_annotations, "r") as file:
lines = file.readlines()
for line in lines:
fields = line.split()
speaker = fields[-3]
start_time = float(fields[3])
end_time = start_time + float(fields[4])
timestamps_start.append(start_time)
speakers.append(speaker)
timestamps_end.append(end_time)
return timestamps_start, timestamps_end, speakers