def preprocessing()

in 01-byoc/code/dataset.py [0:0]


    def preprocessing(self, wav_data, sr):
        """Convert wav_data to log mel spectrogram.
            1. normalize the wav_data
            2. convert the wav_data into mono-channel
            3. resample the wav_data to the sampling rate we want
            4. compute the log mel spetrogram with librosa function
        Args:
            wav_data: An np.array indicating wav data in np.int16 datatype
            sr: An integer specifying the sampling rate of this wav data
        Return:
            inpt: An np.array indicating the log mel spectrogram of data
        """
        # normalize wav_data
        if self.normalize == 'peak':
            samples = wav_data/np.max(wav_data)
        elif self.normalize == 'rms':
            rms_level = 0
            r = 10**(rms_level / 10.0)
            a = np.sqrt((len(wav_data) * r**2) / np.sum(wav_data**2))
            samples = wav_data * a
        else:
            samples = wav_data / 32768.0

        # convert samples to mono-channel file
        if len(samples.shape) > 1:
            samples = np.mean(samples, axis=1)

        # resample samples to 8k
        if sr != self.params.sr:
            samples = resampy.resample(samples, sr, self.params.sr)

        # transform samples to mel spectrogram
        inpt_x = 500
        spec = librosa.feature.melspectrogram(samples, sr=self.params.sr, n_fft=self.params.nfft, hop_length=self.params.hop, n_mels=self.params.mel)
        spec_db = librosa.power_to_db(spec).T
        spec_db = np.concatenate((spec_db, np.zeros((inpt_x - spec_db.shape[0], self.params.mel))), axis=0) if spec_db.shape[0] < inpt_x else spec_db[:inpt_x]
        inpt = np.reshape(spec_db, (1, spec_db.shape[0], spec_db.shape[1]))
        # inpt = np.reshape(spec_db, (spec_db.shape[0], spec_db.shape[1]))

        return inpt.astype('float32')