def distribute_layers()

in picotron/pipeline_parallel/pipeline_parallel.py [0:0]


    def distribute_layers(self, num_layers):
        """
        Distribute model layers across GPUs as evenly as possible.
        Returns the layer indices that should be processed by this GPU.
        """
        # Calculate layers per GPU, handling uneven distribution
        layers_per_gpu = [num_layers // pgm.process_group_manager.pp_world_size + (1 if i < num_layers % pgm.process_group_manager.pp_world_size else 0) for i in range(pgm.process_group_manager.pp_world_size)]
        # Calculate starting layer for this GPU
        start_layer = sum(layers_per_gpu[:pgm.process_group_manager.pp_rank])
        return list(range(start_layer, start_layer + layers_per_gpu[pgm.process_group_manager.pp_rank]))