def get_scaled_lr_scheduler()

in vissl/utils/hydra_config.py [0:0]


def get_scaled_lr_scheduler(cfg, param_schedulers, scaled_lr):
    """
    Scale learning rate value for different Learning rate types. See infer_learning_rate()
    for how the scaled LR is calculated.

    Values changed for learning rate schedules:
    1. cosine:
        end_value = scaled_lr * (end_value / start_value)
        start_value = scaled_lr and
    2. multistep:
        gamma = values[1] / values[0]
        values = [scaled_lr * pow(gamma, idx) for idx in range(len(values))]
    3. step_with_fixed_gamma
        base_value = scaled_lr
    4. linear:
       end_value = scaled_lr
    5. inverse_sqrt:
       start_value = scaled_lr
    6. constant:
       value = scaled_lr
    7. composite:
        recursively call to scale each composition. If the composition consists of a linear
        schedule, we assume that a linear warmup is applied. If the linear warmup is
        applied, it's possible the warmup is not necessary if the global batch_size is smaller
        than the base_lr_batch_size and in that case, we remove the linear warmup from the
        schedule.
    """
    if "cosine" in param_schedulers["name"]:
        start_value = param_schedulers["start_value"]
        end_value = param_schedulers["end_value"]
        decay_multiplier = end_value / start_value
        param_schedulers["start_value"] = float(scaled_lr)
        param_schedulers["end_value"] = float(scaled_lr * decay_multiplier)
    elif param_schedulers["name"] == "multistep" or param_schedulers["name"] == "step":
        values = param_schedulers["values"]
        gamma = 1.0
        if len(values) > 1:
            gamma = round(values[1] / values[0], 6)
        new_values = []
        for idx in range(len(values)):
            new_values.append(round(float(scaled_lr * pow(gamma, idx)), 8))
        param_schedulers["values"] = new_values
    elif param_schedulers["name"] == "step_with_fixed_gamma":
        param_schedulers["base_value"] = scaled_lr
    elif param_schedulers["name"] == "composite":
        has_linear_warmup = False
        for idx in range(len(param_schedulers["schedulers"])):
            if param_schedulers["schedulers"][idx]["name"] == "linear":
                has_linear_warmup = True
            scheduler = get_scaled_lr_scheduler(
                cfg, param_schedulers["schedulers"][idx], scaled_lr
            )
            param_schedulers["schedulers"][idx] = scheduler
        # in case of composite LR schedule, if there's linear warmup specified,
        # we check if the warmup is meaningful or not. If not, we simplify the
        # schedule.
        if has_linear_warmup:
            resolve_linear_schedule(cfg, param_schedulers)
    elif param_schedulers["name"] == "linear":
        param_schedulers["end_value"] = scaled_lr
    elif param_schedulers["name"] == "inverse_sqrt":
        param_schedulers["start_value"] = scaled_lr
    elif param_schedulers["name"] == "constant":
        param_schedulers["value"] = scaled_lr
    else:
        raise RuntimeError(
            f"Unknow param_scheduler: {param_schedulers['name']}. NOT scaling linearly"
        )
    return param_schedulers