in recipes/data/switchboard/utils.py [0:0]
def process_swbd_data(sample_data):
data, _, swbd_audio_path, sph2pipe, acronym_dict = sample_data
id, sphfile, chA, chB = data
tmp_file = os.path.join(swbd_audio_path, "{pid}_tmp.wav".format(pid=os.getpid()))
cur_audio_path = os.path.join(swbd_audio_path, id)
os.makedirs(cur_audio_path, exist_ok=True)
idx = 0
lines = []
for channel in ["A", "B"]:
os.system(
"{sph} -f wav -c {c} {i} {o}".format(
sph=sph2pipe, c=1 if channel == "A" else 2, i=sphfile, o=tmp_file
)
)
assert (
sox.file_info.duration(tmp_file) > 0
), "Audio file {} duration is zero.".format(sphfile)
with open(chA if channel == "A" else chB, "r") as f:
for line in f:
name = line[0:6].replace("sw", "sw0")
channel = line[6]
splits = line.strip().split(" ", 3)
start = float(splits[1])
end = float(splits[2])
transcript = sanitize(splits[3], acronym_dict)
if not transcript:
continue
utt = "{n}-{c}_{s}-{e}".format(
n=name,
c=channel,
s="{:06d}".format(int(start * 100 + 0.5)),
e="{:06d}".format(int(end * 100 + 0.5)),
)
out_file = os.path.join(cur_audio_path, "{:09d}.flac".format(idx))
sox_tfm = sox.Transformer()
sox_tfm.set_output_format(
file_type="flac", encoding="signed-integer", bits=16
)
sox_tfm.trim(start, end)
sox_tfm.build(tmp_file, out_file)
duration = (end - start) * 1000.0
idx = idx + 1
lines.append(
"\t".join(
[utt, out_file, "{0:.2f}".format(duration), transcript.lower()]
)
)
os.remove(tmp_file)
return lines