def calc_gradnorm_per_sample()

in models/base.py [0:0]


    def calc_gradnorm_per_sample(self, features, targets=None, temperature=1.):
        assert len(features.shape) == 1
        self.requires_grad_(True)

        features = features.view(1, -1)
        features = Variable(features.cuda(), requires_grad=True)
        self.zero_grad()
        outputs = self.forward_classifier(features) / temperature

        if targets is None:
            targets = torch.ones((1, self.num_classes)).cuda() / self.num_classes
        
        kl_loss = F.kl_div(outputs.softmax(dim=-1).log(), targets.softmax(dim=-1), reduction='sum')
        kl_loss.backward()
        layer_grad = self.linear.weight.grad.data
        gradnorm = torch.sum(torch.abs(layer_grad))

        self.requires_grad_(False)

        return gradnorm