def eval_models()

in luckmatter/recon_multilayer.py [0:0]


def eval_models(iter_num, loader, teacher, student, loss_func, args, init_corrs, init_student, active_nodes=None):
    delta_ws, delta_ws_rel, w_norms = compare_weights(student, init_student)

    corr, corr_indices, output_t, output_s = getCorrs(loader, teacher, student, args)
    t_std = output_t["y"].data.std()
    s_std = output_s["y"].data.std()

    err = loss_func(output_t["y"].data, output_s["y"].data)

    # pick_mats = corrIndices2pickMats(corr_indices)
    # Combined student nodes to form a teacher node. 
    # Some heuristics here.
    combined_mats = [ (100 * (c - c.max(dim=1,keepdim=True)[0])).exp() for c in corr ]

    stats = dict()
    verbose = False

    if args.stats_H:
        Hs_st, betas_st = compute_Hs(student, output_s, teacher, output_t)
        Hs_ss, betas_ss = compute_Hs(student, output_s, student, output_s)

        stats.update(dict(Hs=Hs_ss, Hs_s=Hs_st, betas=betas_ss, betas_s=betas_st))

        if verbose:
            with np.printoptions(precision=3, suppress=True, linewidth=120):
                layer = 0
                for H_st, H_ss in zip(Hs_st, Hs_ss):
                    m = combined_mats[layer]
                    # From bottom to top

                    '''
                    print(f"{layer}: H*: ")
                    alpha = H_st.sum(0)[pick_mat, :]
                    print(alpha.cpu().numpy())
                    print(f"{layer}: H: ")
                    beta = H_ss.sum(0)[:, pick_mat][pick_mat, :]
                    print(beta.cpu().numpy())
                    print(f"{layer}: alpha / beta: ")
                    print( (alpha / beta).cpu().numpy() )
                    '''

                    W_s = m @ student.from_bottom_linear(layer)
                    if layer > 0:
                        W_s = W_s @ combined_mats[layer-1].t()
                    W_t = teacher.from_bottom_linear(layer)

                    print(f"{layer}: Student W (after renorm)")
                    # Student needs to be renormalized.
                    W_s /= W_s.norm(dim=1, keepdim=True) + 1e-5
                    print(W_s.cpu().numpy())
                    print(f"{layer}: Teacher W")
                    print(W_t.cpu().numpy())
                    # print(W_t.norm(dim=1))
                    print(f"{layer}: Teacher / Student W")
                    print( (W_t / (W_s + 1e-6)).cpu().numpy() )

                    layer += 1

                W_s = student.from_bottom_linear(layer) @ combined_mats[-1].t()
                W_t = teacher.from_bottom_linear(layer)

                print(f"{layer}: Final Student W (after renorm)")
                W_s /= W_s.norm(dim=1, keepdim=True) + 1e-5
                print(W_s.cpu().numpy())
                print(f"{layer}: Final Teacher W")
                print(W_t.cpu().numpy())
                # print(W_t.norm(dim=2))
                print(f"{layer}: Final Teacher / Student W")
                print( (W_t / (W_s + 1e-6)).cpu().numpy() )

    '''
    total_diff, stats = stats_from_rel(student, rels_st)
    total_diff_ss, stats_ss = stats_from_rel(student, rels_ss)
    with np.printoptions(precision=3, suppress=True):
        # print("Total diff: %s" % str(total_diff))
        print(stats["means"])
        # print("Total diff_ss: %s" % str(total_diff_ss))
        print(stats_ss["means"])
        #if last_total_diff is not None:
        #    percent = (total_diff - last_total_diff) / last_total_diff * 100
        #    print("Increment percent: %s" % str(percent) )
    last_total_diff = total_diff
    '''

    result = compareCorrIndices(init_corrs, corr_indices)
    print_corrs(result, active_nodes=active_nodes, first_n=5)

    accuracy = 0.0
    if args.dataset != "gaussian":
        accuracy = full_eval_cls(loader, student, args)
    
    # print("[%d] Err: %f. std: t=%.3f/s=%.3f, active_ratio: %s" % (iter_num, err.data.item(), t_std, s_std, ratio_str))
    print("[%d] Err: %f, accuracy: %f%%" % (iter_num, err.data.item(), accuracy))
    if verbose:
        ratio_str = ""
        for layer, (h_t, h_s) in enumerate(zip(output_t["hs"], output_s["hs"])):
            this_layer = []
            # for k, (h_tt, h_ss) in enumerate(zip(h_t, h_s)):
            for k in range(h_t.size(1)):
                h_tt = h_t[:, k]
                teacher_ratio = (h_tt.data > 0.0).sum().item() / h_tt.data.numel()
                # student_ratio = (h_ss.data > 0.0).sum().item() / h_ss.data.numel()
                # this_layer.append("[%d] t=%.2f%%/s=%.2f%%" % (k, teacher_ratio * 100.0, student_ratio * 100.0))
                this_layer.append("[%d]=%.2f%%" % (k, teacher_ratio * 100.0))

            student_ratio = (h_s.data > 1.0).sum().item() / h_s.data.numel()
            ratio_str += ("L%d" % layer) + ": " + ",".join(this_layer) + "; s=%.2f%% | " % (student_ratio * 100.0)

            # all_corrs.append([c.cpu().numpy() for c in corr])
            # all_weights.append(model2numpy(student))
            # all_activations.append(dict(t=activation2numpy(output_t), s=activation2numpy(output_s)))
        print("[%d] std: t=%.3f/s=%.3f, active_ratio: %s" % (i, t_std, s_std, ratio_str))

    if args.stats_w:
        for i, (delta_w, delta_w_rel, w_norm) in enumerate(zip(delta_ws, delta_ws_rel, w_norms)):
            print(f"[{i}]: delta_w: {get_stat(delta_w)} | delta_w_rel: {get_stat(delta_w_rel)} | w_norm: {get_stat(w_norm)}")

        stats.update(dict(delta_ws=delta_ws, delta_ws_rel=delta_ws_rel, w_norms=w_norms))

    stats.update(dict(iter_num=iter_num, accuracy=accuracy, loss=err.data.item(), corrs=[ c.t().cpu() for c in corr ]))

    return stats