def do_split()

in data/ami/utils.py [0:0]


def do_split(all_records, spkrs, total_seconds, handles_chosen=None):
    """
    Greedily selecting speakers, provided we don't go over budget
    """
    time_taken = 0.0
    records_filtered = []
    idx = 0
    speakers = copy.deepcopy(spkrs)
    current_speaker_time = {spk: 0 for spk in speakers}
    current_speaker_idx = {spk: 0 for spk in speakers}
    while True:
        if len(speakers) == 0:
            break
        speaker = speakers[idx % len(speakers)]
        idx += 1
        tocontinue = False
        while True:
            cur_spk_idx = current_speaker_idx[speaker]
            if cur_spk_idx == len(all_records[speaker]):
                speakers.remove(speaker)
                tocontinue = True
                break
            cur_record = all_records[speaker][cur_spk_idx]
            current_speaker_idx[speaker] += 1
            if handles_chosen is None or cur_record.fid not in handles_chosen:
                break
        if tocontinue:
            continue
        records_filtered.append(cur_record)
        time_taken += cur_record.length
        current_speaker_time[speaker] += cur_record.length
        if abs(time_taken - total_seconds) < 10:
            break

    return records_filtered, time_taken