in self_supervision_benchmark/modeling/model_builder.py [0:0]
def add_parameter_update_ops(model):
lr_utils.create_learning_rate_blob()
with core.NameScope("cpu"):
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
model.Iter("ITER")
def param_update_ops(model):
weight_decay = model.param_init_net.ConstantFill(
[], 'weight_decay', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY
)
weight_decay_bn = model.param_init_net.ConstantFill(
[], 'weight_decay_bn', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY_BN
)
# for jigsaw model, all the bias params have weight decay set to
weight_decay_zero_bias = model.param_init_net.ConstantFill(
[], 'weight_decay_zero_bias', shape=[1], value=0.0
)
zero = model.param_init_net.ConstantFill(
[], "ZERO", shape=[1], value=0.0
)
one = model.param_init_net.ConstantFill(
[], "ONE", shape=[1], value=1.0
)
two = model.param_init_net.ConstantFill(
[], "TWO", shape=[1], value=2.0
)
params = model.GetParams()
curr_scope = scope.CurrentNameScope()
# scope is of format 'gpu_{}/'.format(device_id), so remove the separator
trainable_params = model.TrainableParams(curr_scope[:-1])
assert len(params) > 0, 'No trainable params found in model'
for param in params:
# only update trainable params
if param in trainable_params:
# the param grad is the summed gradient for the parameter across
# all devices/hosts
param_momentum = model.param_init_net.ConstantFill(
[param], param + '_momentum', value=0.0
)
param_grad = model.param_to_grad[param]
# add weight decay
if '_bn' in str(param):
# make LR 0 and weight decay 0 to keep scale and bias same.
# Scale/bias are the learnable parameters in BN. See
# Algorithm1 https://arxiv.org/pdf/1502.03167.pdf
if cfg.MODEL.BN_NO_SCALE_SHIFT:
model.WeightedSum(
[param_grad, zero, param, weight_decay_bn],
param_grad
)
else:
model.WeightedSum(
[param_grad, one, param, weight_decay_bn],
param_grad
)
elif cfg.MODEL.NO_BIAS_DECAY:
# In jigsaw model, all the bias params have decay=0 and
# lr_multiplier=2
if '_b' in str(param):
model.WeightedSum([
param_grad, two, param, weight_decay_zero_bias
], param_grad)
else:
model.WeightedSum(
[param_grad, one, param, weight_decay], param_grad
)
model.net.MomentumSGDUpdate(
[param_grad, param_momentum, 'lr', param],
[param_grad, param_momentum, param],
momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV,
)
return param_update_ops