def add_parameter_update_ops()

in self_supervision_benchmark/modeling/model_builder.py [0:0]


def add_parameter_update_ops(model):
    lr_utils.create_learning_rate_blob()
    with core.NameScope("cpu"):
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)):
            model.Iter("ITER")

    def param_update_ops(model):
        weight_decay = model.param_init_net.ConstantFill(
            [], 'weight_decay', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY
        )
        weight_decay_bn = model.param_init_net.ConstantFill(
            [], 'weight_decay_bn', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY_BN
        )
        # for jigsaw model, all the bias params have weight decay set to
        weight_decay_zero_bias = model.param_init_net.ConstantFill(
            [], 'weight_decay_zero_bias', shape=[1], value=0.0
        )
        zero = model.param_init_net.ConstantFill(
            [], "ZERO", shape=[1], value=0.0
        )
        one = model.param_init_net.ConstantFill(
            [], "ONE", shape=[1], value=1.0
        )
        two = model.param_init_net.ConstantFill(
            [], "TWO", shape=[1], value=2.0
        )
        params = model.GetParams()
        curr_scope = scope.CurrentNameScope()
        # scope is of format 'gpu_{}/'.format(device_id), so remove the separator
        trainable_params = model.TrainableParams(curr_scope[:-1])
        assert len(params) > 0, 'No trainable params found in model'
        for param in params:
            # only update trainable params
            if param in trainable_params:
                # the param grad is the summed gradient for the parameter across
                # all devices/hosts
                param_momentum = model.param_init_net.ConstantFill(
                    [param], param + '_momentum', value=0.0
                )
                param_grad = model.param_to_grad[param]
                # add weight decay
                if '_bn' in str(param):
                    # make LR 0 and weight decay 0 to keep scale and bias same.
                    # Scale/bias are the learnable parameters in BN. See
                    # Algorithm1 https://arxiv.org/pdf/1502.03167.pdf
                    if cfg.MODEL.BN_NO_SCALE_SHIFT:
                        model.WeightedSum(
                            [param_grad, zero, param, weight_decay_bn],
                            param_grad
                        )
                    else:
                        model.WeightedSum(
                            [param_grad, one, param, weight_decay_bn],
                            param_grad
                        )
                elif cfg.MODEL.NO_BIAS_DECAY:
                    # In jigsaw model, all the bias params have decay=0 and
                    # lr_multiplier=2
                    if '_b' in str(param):
                        model.WeightedSum([
                            param_grad, two, param, weight_decay_zero_bias
                        ], param_grad)
                else:
                    model.WeightedSum(
                        [param_grad, one, param, weight_decay], param_grad
                    )
                model.net.MomentumSGDUpdate(
                    [param_grad, param_momentum, 'lr', param],
                    [param_grad, param_momentum, param],
                    momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV,
                )
    return param_update_ops