def process_hub5_data()

in recipes/data/switchboard/utils.py [0:0]


def process_hub5_data(sample_data):
    line, idx, hub5_sdir, hub5_audio_path, sph2pipe = sample_data
    if (not line) or line.startswith(";;") or ("IGNORE_TIME_SEGMENT_" in line):
        return None
    parts = line.strip().split()
    transcript = " ".join(parts[6:])
    transcript = transcript.replace("((", "(")
    transcript = transcript.replace("<B_ASIDE>", "")
    transcript = transcript.replace("<A_ASIDE>", "")

    spk = "{}-{}".format(parts[0], parts[1])
    start = float(parts[3])
    end = float(parts[4])
    utt = "{u}_{s}-{e}".format(
        u=spk, s="{:06d}".format(int(start * 100)), e="{:06d}".format(int(end * 100))
    )
    in_file = os.path.join(hub5_sdir, "english", parts[0] + ".sph")
    out_file = os.path.join(hub5_audio_path, "{:09d}.flac".format(idx))
    tmp_file = os.path.join(hub5_audio_path, "{pid}_tmp.wav".format(pid=os.getpid()))
    os.system(
        "{sph} -f wav -c {c} {i} {o}".format(
            sph=sph2pipe, c=1 if parts[1] == "A" else 2, i=in_file, o=tmp_file
        )
    )
    assert (
        sox.file_info.duration(tmp_file) > 0
    ), "Audio file {} duration is zero.".format(in_file)
    sox_tfm = sox.Transformer()
    sox_tfm.set_output_format(file_type="flac", encoding="signed-integer", bits=16)
    sox_tfm.trim(start, end)
    sox_tfm.build(tmp_file, out_file)
    os.remove(tmp_file)
    duration = (end - start) * 1000.0
    return "\t".join([utt, out_file, "{0:.2f}".format(duration), transcript.lower()])