in vissl/utils/hydra_config.py [0:0]
def get_scaled_lr_scheduler(cfg, param_schedulers, scaled_lr):
"""
Scale learning rate value for different Learning rate types. See infer_learning_rate()
for how the scaled LR is calculated.
Values changed for learning rate schedules:
1. cosine:
end_value = scaled_lr * (end_value / start_value)
start_value = scaled_lr and
2. multistep:
gamma = values[1] / values[0]
values = [scaled_lr * pow(gamma, idx) for idx in range(len(values))]
3. step_with_fixed_gamma
base_value = scaled_lr
4. linear:
end_value = scaled_lr
5. inverse_sqrt:
start_value = scaled_lr
6. constant:
value = scaled_lr
7. composite:
recursively call to scale each composition. If the composition consists of a linear
schedule, we assume that a linear warmup is applied. If the linear warmup is
applied, it's possible the warmup is not necessary if the global batch_size is smaller
than the base_lr_batch_size and in that case, we remove the linear warmup from the
schedule.
"""
if "cosine" in param_schedulers["name"]:
start_value = param_schedulers["start_value"]
end_value = param_schedulers["end_value"]
decay_multiplier = end_value / start_value
param_schedulers["start_value"] = float(scaled_lr)
param_schedulers["end_value"] = float(scaled_lr * decay_multiplier)
elif param_schedulers["name"] == "multistep" or param_schedulers["name"] == "step":
values = param_schedulers["values"]
gamma = 1.0
if len(values) > 1:
gamma = round(values[1] / values[0], 6)
new_values = []
for idx in range(len(values)):
new_values.append(round(float(scaled_lr * pow(gamma, idx)), 8))
param_schedulers["values"] = new_values
elif param_schedulers["name"] == "step_with_fixed_gamma":
param_schedulers["base_value"] = scaled_lr
elif param_schedulers["name"] == "composite":
has_linear_warmup = False
for idx in range(len(param_schedulers["schedulers"])):
if param_schedulers["schedulers"][idx]["name"] == "linear":
has_linear_warmup = True
scheduler = get_scaled_lr_scheduler(
cfg, param_schedulers["schedulers"][idx], scaled_lr
)
param_schedulers["schedulers"][idx] = scheduler
# in case of composite LR schedule, if there's linear warmup specified,
# we check if the warmup is meaningful or not. If not, we simplify the
# schedule.
if has_linear_warmup:
resolve_linear_schedule(cfg, param_schedulers)
elif param_schedulers["name"] == "linear":
param_schedulers["end_value"] = scaled_lr
elif param_schedulers["name"] == "inverse_sqrt":
param_schedulers["start_value"] = scaled_lr
elif param_schedulers["name"] == "constant":
param_schedulers["value"] = scaled_lr
else:
raise RuntimeError(
f"Unknow param_scheduler: {param_schedulers['name']}. NOT scaling linearly"
)
return param_schedulers