in picotron/pipeline_parallel/pipeline_parallel.py [0:0]
def distribute_layers(self, num_layers):
"""
Distribute model layers across GPUs as evenly as possible.
Returns the layer indices that should be processed by this GPU.
"""
# Calculate layers per GPU, handling uneven distribution
layers_per_gpu = [num_layers // pgm.process_group_manager.pp_world_size + (1 if i < num_layers % pgm.process_group_manager.pp_world_size else 0) for i in range(pgm.process_group_manager.pp_world_size)]
# Calculate starting layer for this GPU
start_layer = sum(layers_per_gpu[:pgm.process_group_manager.pp_rank])
return list(range(start_layer, start_layer + layers_per_gpu[pgm.process_group_manager.pp_rank]))