fast_grad_models.py [42:70]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        assert(len(hidden_sizes) >= 2)
        self.input_size = hidden_sizes[0]
        self.act = F.relu

        if len(hidden_sizes) == 2:
            self.hidden_layers = []
        else:
            self.hidden_layers = nn.ModuleList([nn.Linear(hidden_sizes[i], hidden_sizes[i+1]) for i in range(len(hidden_sizes) - 2)])
        self.output_layer = nn.Linear(hidden_sizes[-2], hidden_sizes[-1])

    def forward(self, x):
        """
        Forward pass that returns also returns
        * the activations (H) and
        * the linear combinations (Z)
        of each layer, to be able to use the trick from [1].

        Args:
        - x : The inputs of the network
        Returns:
        - logits
        - activations at each layer (including the inputs)
        - linear combinations at each layer

        > [1] EFFICIENT PER-EXAMPLE GRADIENT COMPUTATIONS
        > by Ian Goodfellow
        > https://arxiv.org/pdf/1510.01799.pdf
        """
        x = x.view(-1, self.input_size)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


models.py [36:46]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        assert(len(hidden_sizes) >= 2)
        self.input_size = hidden_sizes[0]
        self.act = F.relu
        if len(hidden_sizes) == 2:
            self.hidden_layers = []
        else:
            self.hidden_layers = nn.ModuleList([nn.Linear(hidden_sizes[i], hidden_sizes[i+1]) for i in range(len(hidden_sizes) - 2)])
        self.output_layer = nn.Linear(hidden_sizes[-2], hidden_sizes[-1])

    def forward(self, x):
        x = x.view(-1, self.input_size)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -