in luckmatter/recon_multilayer.py [0:0]
def eval_models(iter_num, loader, teacher, student, loss_func, args, init_corrs, init_student, active_nodes=None):
delta_ws, delta_ws_rel, w_norms = compare_weights(student, init_student)
corr, corr_indices, output_t, output_s = getCorrs(loader, teacher, student, args)
t_std = output_t["y"].data.std()
s_std = output_s["y"].data.std()
err = loss_func(output_t["y"].data, output_s["y"].data)
# pick_mats = corrIndices2pickMats(corr_indices)
# Combined student nodes to form a teacher node.
# Some heuristics here.
combined_mats = [ (100 * (c - c.max(dim=1,keepdim=True)[0])).exp() for c in corr ]
stats = dict()
verbose = False
if args.stats_H:
Hs_st, betas_st = compute_Hs(student, output_s, teacher, output_t)
Hs_ss, betas_ss = compute_Hs(student, output_s, student, output_s)
stats.update(dict(Hs=Hs_ss, Hs_s=Hs_st, betas=betas_ss, betas_s=betas_st))
if verbose:
with np.printoptions(precision=3, suppress=True, linewidth=120):
layer = 0
for H_st, H_ss in zip(Hs_st, Hs_ss):
m = combined_mats[layer]
# From bottom to top
'''
print(f"{layer}: H*: ")
alpha = H_st.sum(0)[pick_mat, :]
print(alpha.cpu().numpy())
print(f"{layer}: H: ")
beta = H_ss.sum(0)[:, pick_mat][pick_mat, :]
print(beta.cpu().numpy())
print(f"{layer}: alpha / beta: ")
print( (alpha / beta).cpu().numpy() )
'''
W_s = m @ student.from_bottom_linear(layer)
if layer > 0:
W_s = W_s @ combined_mats[layer-1].t()
W_t = teacher.from_bottom_linear(layer)
print(f"{layer}: Student W (after renorm)")
# Student needs to be renormalized.
W_s /= W_s.norm(dim=1, keepdim=True) + 1e-5
print(W_s.cpu().numpy())
print(f"{layer}: Teacher W")
print(W_t.cpu().numpy())
# print(W_t.norm(dim=1))
print(f"{layer}: Teacher / Student W")
print( (W_t / (W_s + 1e-6)).cpu().numpy() )
layer += 1
W_s = student.from_bottom_linear(layer) @ combined_mats[-1].t()
W_t = teacher.from_bottom_linear(layer)
print(f"{layer}: Final Student W (after renorm)")
W_s /= W_s.norm(dim=1, keepdim=True) + 1e-5
print(W_s.cpu().numpy())
print(f"{layer}: Final Teacher W")
print(W_t.cpu().numpy())
# print(W_t.norm(dim=2))
print(f"{layer}: Final Teacher / Student W")
print( (W_t / (W_s + 1e-6)).cpu().numpy() )
'''
total_diff, stats = stats_from_rel(student, rels_st)
total_diff_ss, stats_ss = stats_from_rel(student, rels_ss)
with np.printoptions(precision=3, suppress=True):
# print("Total diff: %s" % str(total_diff))
print(stats["means"])
# print("Total diff_ss: %s" % str(total_diff_ss))
print(stats_ss["means"])
#if last_total_diff is not None:
# percent = (total_diff - last_total_diff) / last_total_diff * 100
# print("Increment percent: %s" % str(percent) )
last_total_diff = total_diff
'''
result = compareCorrIndices(init_corrs, corr_indices)
print_corrs(result, active_nodes=active_nodes, first_n=5)
accuracy = 0.0
if args.dataset != "gaussian":
accuracy = full_eval_cls(loader, student, args)
# print("[%d] Err: %f. std: t=%.3f/s=%.3f, active_ratio: %s" % (iter_num, err.data.item(), t_std, s_std, ratio_str))
print("[%d] Err: %f, accuracy: %f%%" % (iter_num, err.data.item(), accuracy))
if verbose:
ratio_str = ""
for layer, (h_t, h_s) in enumerate(zip(output_t["hs"], output_s["hs"])):
this_layer = []
# for k, (h_tt, h_ss) in enumerate(zip(h_t, h_s)):
for k in range(h_t.size(1)):
h_tt = h_t[:, k]
teacher_ratio = (h_tt.data > 0.0).sum().item() / h_tt.data.numel()
# student_ratio = (h_ss.data > 0.0).sum().item() / h_ss.data.numel()
# this_layer.append("[%d] t=%.2f%%/s=%.2f%%" % (k, teacher_ratio * 100.0, student_ratio * 100.0))
this_layer.append("[%d]=%.2f%%" % (k, teacher_ratio * 100.0))
student_ratio = (h_s.data > 1.0).sum().item() / h_s.data.numel()
ratio_str += ("L%d" % layer) + ": " + ",".join(this_layer) + "; s=%.2f%% | " % (student_ratio * 100.0)
# all_corrs.append([c.cpu().numpy() for c in corr])
# all_weights.append(model2numpy(student))
# all_activations.append(dict(t=activation2numpy(output_t), s=activation2numpy(output_s)))
print("[%d] std: t=%.3f/s=%.3f, active_ratio: %s" % (i, t_std, s_std, ratio_str))
if args.stats_w:
for i, (delta_w, delta_w_rel, w_norm) in enumerate(zip(delta_ws, delta_ws_rel, w_norms)):
print(f"[{i}]: delta_w: {get_stat(delta_w)} | delta_w_rel: {get_stat(delta_w_rel)} | w_norm: {get_stat(w_norm)}")
stats.update(dict(delta_ws=delta_ws, delta_ws_rel=delta_ws_rel, w_norms=w_norms))
stats.update(dict(iter_num=iter_num, accuracy=accuracy, loss=err.data.item(), corrs=[ c.t().cpu() for c in corr ]))
return stats