in src/diarizers/data/speaker_diarization.py [0:0]
def process_cha_file(self, path_to_cha):
timestamps_start = []
timestamps_end = []
speakers = []
line = open(path_to_cha, "r").read().splitlines()
for i, line in enumerate(line):
if line.startswith("*"):
id = line.split(":")[0][1:]
splits = line.split(" ")
if splits[-1].find("_") != -1:
indexes = splits[-1].strip()
start = indexes.split("_")[0].strip()[1:]
end = indexes.split("_")[1].strip()[:-1]
if represent_int(start) and represent_int(end):
start, end = get_start_end(int(start), int(end))
speakers.append(id)
timestamps_start.append(start)
timestamps_end.append(end)
return timestamps_start, timestamps_end, speakers