luckmatter/model_gen.py [20:59]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def get_aug_w(w):
    # w: [output_d, input_d]
    # aug_w: [output_d + 1, input_d + 1]
    output_d, input_d = w.weight.size()
    aug_w = torch.zeros( (output_d + 1, input_d + 1), dtype = w.weight.dtype, device = w.weight.device)
    aug_w[:output_d, :input_d] = w.weight.data
    aug_w[:output_d, input_d] = w.bias.data
    aug_w[output_d, input_d] = 1
    return aug_w

def set_orth(layer):
    w = layer.weight
    orth = haar_measure(w.size(1))
    w.data = torch.from_numpy(orth[:w.size(0), :w.size(1)].astype('f4')).cuda()

def set_add_noise(layer, teacher_layer, perturb):
    layer.weight.data[:] = teacher_layer.weight.data[:] + torch.randn(teacher_layer.weight.size()).cuda() * perturb
    layer.bias.data[:] = teacher_layer.bias.data[:] + torch.randn(teacher_layer.bias.size()).cuda() * perturb

def set_same_dir(layer, teacher_layer):
    norm = layer.weight.data.norm()
    r = norm / teacher_layer.weight.data.norm()
    layer.weight.data[:] = teacher_layer.weight.data * r
    layer.bias.data[:] = teacher_layer.bias.data * r

def set_same_sign(layer, teacher_layer):
    sel = (teacher_layer.weight.data > 0) * (layer.weight.data < 0) + (teacher_layer.weight.data < 0) * (layer.weight.data > 0)
    layer.weight.data[sel] *= -1.0

    sel = (teacher_layer.bias.data > 0) * (layer.bias.data < 0) + (teacher_layer.bias.data < 0) * (layer.bias.data > 0)
    layer.bias.data[sel] *= -1.0

def normalize_layer(layer):
    # [output, input]
    w = layer.weight.data
    for i in range(w.size(0)):
        norm = w[i].pow(2).sum().sqrt() + 1e-5 
        w[i] /= norm
        if layer.bias is not None:
            layer.bias.data[i] /= norm
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



student_specialization/model_gen.py [14:53]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def get_aug_w(w):
    # w: [output_d, input_d]
    # aug_w: [output_d + 1, input_d + 1]
    output_d, input_d = w.weight.size()
    aug_w = torch.zeros( (output_d + 1, input_d + 1), dtype = w.weight.dtype, device = w.weight.device)
    aug_w[:output_d, :input_d] = w.weight.data
    aug_w[:output_d, input_d] = w.bias.data
    aug_w[output_d, input_d] = 1
    return aug_w

def set_orth(layer):
    w = layer.weight
    orth = haar_measure(w.size(1))
    w.data = torch.from_numpy(orth[:w.size(0), :w.size(1)].astype('f4')).cuda()

def set_add_noise(layer, teacher_layer, perturb):
    layer.weight.data[:] = teacher_layer.weight.data[:] + torch.randn(teacher_layer.weight.size()).cuda() * perturb
    layer.bias.data[:] = teacher_layer.bias.data[:] + torch.randn(teacher_layer.bias.size()).cuda() * perturb

def set_same_dir(layer, teacher_layer):
    norm = layer.weight.data.norm()
    r = norm / teacher_layer.weight.data.norm()
    layer.weight.data[:] = teacher_layer.weight.data * r
    layer.bias.data[:] = teacher_layer.bias.data * r

def set_same_sign(layer, teacher_layer):
    sel = (teacher_layer.weight.data > 0) * (layer.weight.data < 0) + (teacher_layer.weight.data < 0) * (layer.weight.data > 0)
    layer.weight.data[sel] *= -1.0

    sel = (teacher_layer.bias.data > 0) * (layer.bias.data < 0) + (teacher_layer.bias.data < 0) * (layer.bias.data > 0)
    layer.bias.data[sel] *= -1.0

def normalize_layer(layer):
    # [output, input]
    w = layer.weight.data
    for i in range(w.size(0)):
        norm = w[i].pow(2).sum().sqrt() + 1e-5 
        w[i] /= norm
        if layer.bias is not None:
            layer.bias.data[i] /= norm
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



