def to_wav2letter_format()

in voxpopuli/segmentation/__init__.py [0:0]


def to_wav2letter_format(data: torch.tensor, sr: int) -> torch.tensor:
    r"""
    Wav2letter needs mono 16kHz inputs
    """
    if len(data.size()) == 2:
        data = data.mean(dim=0, keepdim=True)
    elif len(data.size()) == 1:
        data = data.view(1, -1)
    else:
        raise ValueError("Invalid tensor format")
    if sr != 16000:
        data = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)(data)
        data = torch.clamp(data, min=-1.0, max=1.0)
    return data