in chatlearn/schedule/model_manager.py [0:0]
def find_model_packing_strategy(self, models, total_gpu):
"""
Find model packing strategies that can pack all models into total_gpu
try to balance the models among devices, i.e., each device holds similar number of model parts
e.g., given models A:8, B:4, C:4, total_gpu: 8
then the pack strategy is [(A), (B,C)]
"""
sorted_models = sorted(models, key=lambda x: (x.trainable, x.total_gpu), reverse=True)
assert sorted_models[0].total_gpu <= total_gpu
final_packs = []
# key is the remaining gpu
unfinished_packs = defaultdict(list)
for model in sorted_models:
gpu = model.total_gpu
if gpu == total_gpu:
final_packs.append([model])
else:
if gpu in unfinished_packs:
# find a pack
packs = unfinished_packs[gpu].pop(0)
if len(unfinished_packs[gpu]) == 0:
unfinished_packs.pop(gpu)
packs.append(model)
final_packs.append(packs)
else:
near_gpus = [d for d in unfinished_packs if d > gpu]
if near_gpus:
near_gpu = sorted(near_gpus)[0]
packs = unfinished_packs[near_gpu].pop(0)
if len(unfinished_packs[gpu]) == 0:
unfinished_packs.pop(gpu)
packs.append(model)
# update the remaining gpu number
unfinished_packs[near_gpu - gpu].append(packs)
else:
# add model and wait for packing
unfinished_packs[total_gpu - gpu].append([model])
for gpu, packs_list in unfinished_packs.items():
if packs_list:
final_packs.extend(packs_list)
return final_packs