def shuffle()

in oss-torch-connector/osstorchconnector/oss_iterable_dataset.py [0:0]


    def shuffle(self, generator=None):
        if generator is None:
            seed = int(torch.empty((), dtype=torch.int64).random_().item())
            generator = torch.Generator()
            generator.manual_seed(seed)
            log.debug("OssIterableDataset shuffle seed: %d", seed)
        chunks = []
        index = 0
        while index < self._dataset_size:
            chunk_size = min(max(1, int(random.gauss(self._chunk_size, 10))), self._dataset_size - index)
            chunks.append((index, chunk_size))
            index += chunk_size
        random_sampler = torch.utils.data.SubsetRandomSampler(chunks, generator=generator)
        self._chunks = list(random_sampler)
        log.info("OssIterableDataset shuffle chunk indices, dataset size: %d, chunk num: %d",
                 self._dataset_size, len(self._chunks))