in modeling.py [0:0]
def initialize_model(num_inputs, num_outputs, model="linear", device="cpu"):
"""
Initializes linear model with specified number of inputs and outputs.
"""
# load model:
model_name = model
if model_name == "linear":
model = nn.Linear(num_inputs, num_outputs)
elif model_name.startswith("resnet"):
# get a vanilla ResNet model:
assert hasattr(resnet, model_name), f"Unknown model: {model_name}"
model = getattr(resnet, model_name)()
# TODO: Add checks that number of inputs and outputs match.
# replace all batchnorm layers by groupnorm layers:
for name, module in model.named_modules():
if isinstance(module, nn.BatchNorm2d):
# create groupnorm layer:
new_module = nn.GroupNorm(
min(32, module.num_features),
module.num_features,
affine=(module.weight is not None and module.bias is not None),
)
# replace the layer:
parent = model
name_list = name.split(".")
for name in name_list[:-1]:
parent = parent._modules[name]
parent._modules[name_list[-1]] = new_module
else:
raise ValueError(f"Unknown model: {model_name}")
# copy model to GPU(s) and return:
if device == "gpu":
assert torch.cuda.is_available(), "CUDA is not available on this machine."
logging.info("Copying model to GPU...")
model.cuda()
return model