in grok/metrics.py [0:0]
def calculate(trained_model, init_model, device, dataset_size, margin, input_dim):
"""
Calculates various measures given trained model and model at init
Computes:
measures: norm based measures on the model
bounds: generalization bounds on the model
"""
model = copy.deepcopy(trained_model)
# depth
d = compute_measure(model, init_model, depth, "sum", {})
# number of parameters (not including batch norm)
nparam = compute_measure(model, init_model, n_param, "sum", {})
measure, bound = {}, {}
with torch.no_grad():
# Compute measures
measure["L_{1,inf} norm"] = (
compute_measure(
model, init_model, norm, "product", {"p": 1, "q": float("Inf")}
)
/ margin
)
measure["Frobenius norm"] = (
compute_measure(model, init_model, norm, "product", {"p": 2, "q": 2})
/ margin
)
measure["L_{3,1.5} norm"] = (
compute_measure(model, init_model, norm, "product", {"p": 3, "q": 1.5})
/ margin
)
measure["Spectral norm"] = (
compute_measure(model, init_model, op_norm, "product", {"p": float("Inf")})
/ margin
)
measure["L_1.5 operator norm"] = (
compute_measure(model, init_model, op_norm, "product", {"p": 1.5}) / margin
)
measure["Trace norm"] = (
compute_measure(model, init_model, op_norm, "product", {"p": 1}) / margin
)
# input_size = [context_len, emb_dim]
# measure["L1_path norm"] = (
# lp_path_norm(
# model, device, p=1, input_size=input_size
# )
# / margin
# )
# measure["L1.5_path norm"] = (
# lp_path_norm(
# model, device, p=1.5, input_size=input_size
# )
# / margin
# )
# measure["L2_path norm"] = (
# lp_path_norm(
# model, device, p=2, input_size=input_size
# )
# / margin
# )
# Compute generalization bounds without constant or additive logarithmic factors
# Golowich 2018
# https://arxiv.org/pdf/1712.06541.pdf
alpha = math.sqrt(d + math.log(1 * input_dim * input_dim))
# Bartlett Mendelson 2002
bound["L1_max Bound"] = (
alpha * measure["L_{1,inf} norm"] / math.sqrt(dataset_size)
)
# Neyshabur 2015
bound["Frobenius Bound"] = (
alpha * measure["Frobenius norm"] / math.sqrt(dataset_size)
)
# Neyshabur 2015
bound["L_{3,1.5} Bound"] = (
alpha * measure["L_{3,1.5} norm"] / (dataset_size ** (1 / 3))
)
beta = math.log(dataset_size) * math.log(nparam)
ratio = compute_measure(
model,
init_model,
h_dist_op_norm,
"norm",
{"p": 2, "q": 1, "p_op": float("Inf")},
p=2 / 3,
)
# Spectral L_{2, 1} Bound
# Bartlett 2017
bound["Spec_L_{2,1} Bound"] = (
beta * measure["Spectral norm"] * ratio / math.sqrt(dataset_size)
)
ratio = compute_measure(
model,
init_model,
h_dist_op_norm,
"norm",
{"p": 2, "q": 2, "p_op": float("Inf")},
p=2,
)
# Spectral Frobenius
# Neyshabur 2018
# https://arxiv.org/pdf/1706.08947.pdf
bound["Spec_Fro Bound"] = (
d * measure["Spectral norm"] * ratio / math.sqrt(dataset_size)
)
return measure, bound