def __iter__()

in ultravox/data/datasets.py [0:0]


    def __iter__(self):
        actual_length = 0
        skipped_samples = 0
        bad_samples = 0
        dataset_iter = iter(self._dataset)
        for row in dataset_iter:
            actual_length += 1
            sample = self._get_sample(row)
            if sample is None:
                print(f"Sample is None in dataset {self._config.alias} for row {row}")
                bad_samples += 1
                continue  # Skip this sample and proceed to the next

            if self._args.include_audio:
                if sample.audio is None:
                    print(f"Audio is None for sample {sample}")
                    bad_samples += 1
                    continue  # Skip this sample
                if sample.audio.shape[-1] == 0:
                    print(f"Audio length is 0 for sample {sample}")
                    bad_samples += 1
                    continue  # Skip this sample
                if (
                    self._args.max_audio_duration_secs > 0
                    and sample.audio.shape[-1] / data_sample.SAMPLE_RATE
                    > self._args.max_audio_duration_secs
                ):
                    skipped_samples += 1
                    continue  # Skip this sample

            yield sample

        logging.info(
            f"Extracted {actual_length} samples from {self.name} (total: {len(self)}), removed {bad_samples} bad samples, and skipped {skipped_samples} samples for exceeding max audio duration ({self._args.max_audio_duration_secs}s)."
        )