in workload_generator/AIOB_simAI_workload_generator.py [0:0]
def get_model_details(self):
layers = []
visited = set()
def traverse_model(model):
if id(model) in visited:
return
visited.add(id(model))
if self.args.enable_sequence_parallel:
if (
isinstance(model, MegatronColumnLinear)
or isinstance(model, MegatronRowLinear)
or isinstance(model, MegatronEmbedding)
or isinstance(model, FusedLayernorm)
):
params = model.parameters()
param_count = sum(p.numel() for p in params)
layers.append(LayerInfo(model.layer_id, model.name, param_count))
if isinstance(model, MOEMLP):
moe_params = model.parameters()
moe_param_count = sum(p.numel() for p in moe_params)
layers.append(LayerInfo(model.layer_id, model.name, moe_param_count))
else:
if (
isinstance(model, MegatronAttention)
or isinstance(model, MegatronMlp)
or isinstance(model, MegatronEmbedding)
):
params = model.parameters()
param_count = sum(p.numel() for p in params)
layers.append(LayerInfo(model.layer_id, model.name, param_count))
for child in model.child_modules():
traverse_model(child)
traverse_model(model)
return layers