def _file_to_feats()

in src/speech_reps/featurize.py [0:0]


    def _file_to_feats(self, file):

        assert file.suffix == '.wav'
        # To support CMVN files in the future
        cmvn_spec = None

        def _run_cmd(cmd):
            logging.warn("Running {}".format(cmd))
            try:
                check_call(cmd, shell=True, universal_newlines=True)
            except CalledProcessError as e:
                logging.error("Failed with code {}:".format(e.returncode))
                logging.error(e.output)
                raise e

        with TemporaryDirectory() as temp_dir:

            temp_dir = Path(temp_dir)

            # Create config placeholder
            conf_file = temp_dir / 'fbank.conf'
            conf_file.write_text('--num-mel-bins=80\n')

            # Create SCP placeholder
            input_scp = temp_dir / 'input.scp'
            input_scp.write_text('file-0 {}\n'.format(file))

            # Compute speech features
            feat_ark = temp_dir / "feat.ark"
            feat_scp = temp_dir / "feat.scp"
            cmd = f"compute-fbank-feats --config={conf_file} scp:{input_scp} ark,scp:{feat_ark},{feat_scp}"
            _run_cmd(cmd)

            cmvn_scp = temp_dir / "cmvn.scp"
            if cmvn_spec is not None:
                # If CMVN specifier is provided, we create a dummy scp
                cmvn_scp.write_text("file-0 {cmvn_spec}\n")
            else:
                # Compute CMVN stats
                cmvn_ark = temp_dir / "cmvn.ark"
                cmd = f"compute-cmvn-stats scp:{feat_scp} ark,scp:{cmvn_ark},{cmvn_scp}"
                _run_cmd(cmd)

            # Apply CMVN
            final_ark = temp_dir / "final.ark"
            final_scp = temp_dir / "final.scp"
            cmd = f"apply-cmvn --norm-vars=true scp:{cmvn_scp} scp:{feat_scp} ark,scp:{final_ark},{final_scp}"
            _run_cmd(cmd)

            with final_scp.open('rb') as fp:
                feats = [features for _, features in kaldi_io.read_mat_scp(fp)][0]

        # Process data
        # Turn the audio into a one-entry batch (TC --> TNC)
        data = torch.from_numpy(np.array(feats)).unsqueeze(0).float()
        if self._ctx is not None:
            data = data.cuda(self._ctx)
        padding_mask = (
            torch.BoolTensor(data.shape).fill_(False).to(data.device)
        )
        vecs = self._model(data, padding_mask).squeeze(0)
        reps = vecs.cpu().detach().numpy()

        return reps