in src/speech_reps/featurize.py [0:0]
def _file_to_feats(self, file):
assert file.suffix == '.wav'
# To support CMVN files in the future
cmvn_spec = None
def _run_cmd(cmd):
logging.warn("Running {}".format(cmd))
try:
check_call(cmd, shell=True, universal_newlines=True)
except CalledProcessError as e:
logging.error("Failed with code {}:".format(e.returncode))
logging.error(e.output)
raise e
with TemporaryDirectory() as temp_dir:
temp_dir = Path(temp_dir)
# Create config placeholder
conf_file = temp_dir / 'fbank.conf'
conf_file.write_text('--num-mel-bins=80\n')
# Create SCP placeholder
input_scp = temp_dir / 'input.scp'
input_scp.write_text('file-0 {}\n'.format(file))
# Compute speech features
feat_ark = temp_dir / "feat.ark"
feat_scp = temp_dir / "feat.scp"
cmd = f"compute-fbank-feats --config={conf_file} scp:{input_scp} ark,scp:{feat_ark},{feat_scp}"
_run_cmd(cmd)
cmvn_scp = temp_dir / "cmvn.scp"
if cmvn_spec is not None:
# If CMVN specifier is provided, we create a dummy scp
cmvn_scp.write_text("file-0 {cmvn_spec}\n")
else:
# Compute CMVN stats
cmvn_ark = temp_dir / "cmvn.ark"
cmd = f"compute-cmvn-stats scp:{feat_scp} ark,scp:{cmvn_ark},{cmvn_scp}"
_run_cmd(cmd)
# Apply CMVN
final_ark = temp_dir / "final.ark"
final_scp = temp_dir / "final.scp"
cmd = f"apply-cmvn --norm-vars=true scp:{cmvn_scp} scp:{feat_scp} ark,scp:{final_ark},{final_scp}"
_run_cmd(cmd)
with final_scp.open('rb') as fp:
feats = [features for _, features in kaldi_io.read_mat_scp(fp)][0]
# Process data
data = torch.from_numpy(np.array(feats))[::2, :].unsqueeze(0).float()
if self._ctx is not None:
data = data.cuda(self._ctx)
padding_mask = (
torch.BoolTensor(data.shape).fill_(False).to(data.device)
)
vecs = self._model(data, padding_mask).squeeze(0)
reps = vecs.cpu().detach().numpy()
return reps