recipes/sota/2019/lm_analysis/shuffle_segments.py (83 lines of code) (raw):
import os
import random
import sys
from multiprocessing import Pool
import sox
align_file = sys.argv[1]
output_dir = sys.argv[2]
lines = []
with open(align_file) as fin:
lines = fin.readlines()
N_THREADS = 40
MIN_SIL_LENGTH = 0.13
TOLERANCE = 0.04
def process(parameters):
tid, n_samples = parameters
output_list = output_dir + "dev-other.{}.lst".format(tid)
with open(output_list, "w") as fout:
for i in range(tid * n_samples, min(len(lines), n_samples * (tid + 1))):
line = lines[i]
sp = line.split("\t")
filename = sp[0]
# print(filename)
# duration = sox.file_info.duration(filename)
alignments = sp[1].strip().split("\\n")
# Parse the alignments
chunk_starts = [0]
chunk_ends = []
words = []
cur_words = []
cur_end = 0
for i, alignment in enumerate(alignments):
sp = alignment.split()
begin = float(sp[2])
length = float(sp[3])
word = sp[4]
cur_end = begin + length
if i == 0:
continue
if word == "$":
if length > MIN_SIL_LENGTH:
chunk_ends.append(cur_end - TOLERANCE)
chunk_starts.append(cur_end - TOLERANCE)
words.append(" ".join(cur_words))
cur_words = []
continue
cur_words.append(word)
if len(cur_words) > 0:
chunk_ends.append(cur_end)
words.append(" ".join(cur_words))
else:
chunk_starts.pop()
# print(duration)
# print(chunk_starts)
# print(chunk_ends)
# print(words)
# Split the audios
order = list(range(len(chunk_starts)))
random.shuffle(order)
new_target = " ".join([words[i] for i in order])
new_audio_path = output_dir + filename.split("/")[-1]
fout.write(
"{}\t{}\t{}\t{}\n".format(
new_audio_path, new_audio_path, chunk_ends[-1] * 1000, new_target
)
)
if len(chunk_starts) == 1:
os.system("cp {} {}".format(filename, output_dir))
continue
paths = []
for i in order:
sox_tfm = sox.Transformer()
sox_tfm.set_output_format(
file_type="flac", encoding="signed-integer", bits=16, rate=16000
)
sox_tfm.trim(chunk_starts[i], chunk_ends[i])
new_path = "/tmp/{}_{}.flac".format(tid, i)
sox_tfm.build(filename, new_path)
paths.append(new_path)
# Combine them
sox_comb = sox.Combiner()
sox_comb.build(list(paths), new_audio_path, "concatenate")
if __name__ == "__main__":
n_sample_per_thread = len(lines) // N_THREADS + 1
print(
"Spreading {} threads with {} samples in each".format(
N_THREADS, n_sample_per_thread
)
)
pool = Pool(N_THREADS)
pool.map(process, zip(list(range(N_THREADS)), [n_sample_per_thread] * N_THREADS))
pool.close()
pool.join()