def __call__()

in datasets/preprocessing.py [0:0]


    def __call__(self, sig, sr, duration=None):
        if duration is None:
            duration = self.duration
        num_frames = int(duration*sr)

        # Check if audio is missing
        if self.missing_as_zero and sig is None:
            sig = np.zeros((1, num_frames), dtype=np.float32)

        # Downmix to mono
        sig = sig.mean(0).astype(np.float32)

        # Trim or pad to constant shape
        if self.trim_pad:
            if sig.shape[0] > num_frames:
                sig = sig[:num_frames]
            elif sig.shape[0] < num_frames:
                n_pad = num_frames - sig.shape[0]
                sig = np.pad(sig, (0, n_pad), mode='constant', constant_values=(0., 0.))

        # Augment by changing volume +/- 10%
        if self.augment:
            sig *= random.uniform(1.-self.volume, 1.+self.volume)

        sig = sig[np.newaxis]
        if self.to_tensor:
            sig = torch.from_numpy(sig)

        return sig, sr