def split_vad()

in data_preparation/split_librilight/prepare_vads.py [0:0]


def split_vad(silence_probs: List[float], p_silence_threshold: float, len_threshold: int) -> List[Tuple[int, int]]:
    """Given a sequence `p_probs` of silence probabilities p, this function
    returns intervals of speech activity, such that (a) those intervals are separated by
    at least `len_threshold` of silent frames (p > `p_silence_threshold`), 
    (b) are themselves longer than `len_threshold`.

    Arguments:
        silence_probs -- list of silence probabilities
        p_silence_threshold -- all frames with silence probability above this thresholds
            are considered as silence
        len_threshold -- minimal length of silence and non-silence segments

    Returns: list of tuples (start_speech_frame, first_silence_frame_after_start or end_of_sequence)
    """
    segments = []

    start = None
    i = 0
    n = len(silence_probs)

    while i < len(silence_probs) and silence_probs[i] > p_silence_threshold:
        i += 1
    # supported invariants: `start` points to the frame where speech starts, i >= start
    start = i

    while i < n:
        # scroll until first silence frame
        if silence_probs[i] < p_silence_threshold:
            i += 1
            continue

        # now i points to the first silence frame
        # look ahead: do we have at least len_threshold silence frames?
        all_silence = True
        for j in range(i + 1, min(i + len_threshold, n)):
            all_silence = all_silence and silence_probs[j] > p_silence_threshold
            if not all_silence:
                break

        if not all_silence:
            # no we don't: disregard the silence, go further
            # starting from the first non-silence frame
            i = j
        else:
            # we do have enough silence for a split
            if i - start > len_threshold:
                segments.append((start, i))

            while i < n and silence_probs[i] > p_silence_threshold:
                i += 1
            start = i
            i += 1

    if i - start > len_threshold and start < n:
        segments.append((start, i))

    return segments