def get_files_for_processor()

in source_directory/training/training_script.py [0:0]


def get_files_for_processor(files):
    split_files = np.array_split(files, hvd.size())
    local_files = split_files[hvd.rank()].tolist()
    
    # get smallest amount of samples against all GPUs/processors
    for i, files in enumerate(split_files):
        dataset = tf.data.TFRecordDataset(files.tolist())
        if i == 0:
            smallest_amount_samples = sum(1 for _ in dataset)
        else:
            smallest_amount_samples = min(smallest_amount_samples, sum(1 for _ in dataset))
        if hvd.rank() == 0:
            print("Dataset {} has {} samples.".format(i, sum(1 for _ in dataset)))
    
    if hvd.rank() == 0:
        print("Smallest amount of samples is {}.".format(smallest_amount_samples))
    return local_files, smallest_amount_samples