def __iter__()

in dpr_scale/utils/utils.py [0:0]


    def __iter__(self):
        indices = list(range(len(self.dataset)))  # type: ignore

        if not self.drop_last:
            # add extra samples to make it evenly divisible
            padding_size = self.total_size - len(indices)
            if padding_size <= len(indices):
                indices += indices[:padding_size]
            else:
                indices += (indices * math.ceil(padding_size / len(indices)))[
                    :padding_size
                ]
        else:
            # remove tail of data to make it evenly divisible.
            indices = indices[: self.total_size]
        assert len(indices) == self.total_size

        # subsample chunk
        chunk_size = self.num_samples * self.num_replicas_per_node
        node_rank = self.rank // self.num_replicas_per_node
        local_rank = self.rank % self.num_replicas_per_node
        start_idx = node_rank * chunk_size
        indices = indices[start_idx : start_idx + chunk_size]
        if self.shuffle:
            # deterministically shuffle
            g = torch.Generator()
            g.manual_seed(self.seed + self.epoch + node_rank)
            shuffle_idx = torch.randperm(
                len(indices), generator=g
            ).tolist()  # type: ignore
            indices = [indices[idx] for idx in shuffle_idx]
        # subsample
        indices = indices[local_rank :: self.num_replicas_per_node]
        assert len(indices) == self.num_samples

        return iter(indices)