def _audio_transform()

in tutorials/video_classification_example/train.py [0:0]


    def _audio_transform(self):
        """
        This function contains example transforms using both PyTorchVideo and TorchAudio
        in the same Callable.
        """
        args = self.args
        n_fft = int(
            float(args.audio_resampled_rate) / 1000 * args.audio_mel_window_size
        )
        hop_length = int(
            float(args.audio_resampled_rate) / 1000 * args.audio_mel_step_size
        )
        eps = 1e-10
        return ApplyTransformToKey(
            key="audio",
            transform=Compose(
                [
                    Resample(
                        orig_freq=args.audio_raw_sample_rate,
                        new_freq=args.audio_resampled_rate,
                    ),
                    MelSpectrogram(
                        sample_rate=args.audio_resampled_rate,
                        n_fft=n_fft,
                        hop_length=hop_length,
                        n_mels=args.audio_num_mels,
                        center=False,
                    ),
                    Lambda(lambda x: x.clamp(min=eps)),
                    Lambda(torch.log),
                    UniformTemporalSubsample(args.audio_mel_num_subsample),
                    Lambda(lambda x: x.transpose(1, 0)),  # (F, T) -> (T, F)
                    Lambda(
                        lambda x: x.view(1, x.size(0), 1, x.size(1))
                    ),  # (T, F) -> (1, T, 1, F)
                    Normalize((args.audio_logmel_mean,), (args.audio_logmel_std,)),
                ]
            ),
        )