in data/ami/utils.py [0:0]
def do_split(all_records, spkrs, total_seconds, handles_chosen=None):
"""
Greedily selecting speakers, provided we don't go over budget
"""
time_taken = 0.0
records_filtered = []
idx = 0
speakers = copy.deepcopy(spkrs)
current_speaker_time = {spk: 0 for spk in speakers}
current_speaker_idx = {spk: 0 for spk in speakers}
while True:
if len(speakers) == 0:
break
speaker = speakers[idx % len(speakers)]
idx += 1
tocontinue = False
while True:
cur_spk_idx = current_speaker_idx[speaker]
if cur_spk_idx == len(all_records[speaker]):
speakers.remove(speaker)
tocontinue = True
break
cur_record = all_records[speaker][cur_spk_idx]
current_speaker_idx[speaker] += 1
if handles_chosen is None or cur_record.fid not in handles_chosen:
break
if tocontinue:
continue
records_filtered.append(cur_record)
time_taken += cur_record.length
current_speaker_time[speaker] += cur_record.length
if abs(time_taken - total_seconds) < 10:
break
return records_filtered, time_taken