def convert()

in pytorchvideo/layers/accelerator/mobile_cpu/attention.py [0:0]


    def convert(self, input_blob_size, **kwargs):
        """
        Converts into efficient version of squeeze-excite (SE) for CPU.
        It changes conv in original SE into linear layer (better supported by CPU).
        """
        if self.is_3d:
            avg_pool = nn.AdaptiveAvgPool3d(1)
        else:
            avg_pool = nn.AdaptiveAvgPool2d(1)
        """
        Reshape tensor size to (B, C) for linear layer.
        """
        reshape0 = _Reshape((input_blob_size[0], input_blob_size[1]))
        fc0 = nn.Linear(
            self.se.block[0].in_channels,
            self.se.block[0].out_channels,
            bias=(not (self.se.block[0].bias is None)),
        )
        state_dict_fc0 = deepcopy(self.se.block[0].state_dict())
        state_dict_fc0["weight"] = state_dict_fc0["weight"].squeeze()
        fc0.load_state_dict(state_dict_fc0)
        activation = deepcopy(self.se.block[1])
        fc1 = nn.Linear(
            self.se.block[2].in_channels,
            self.se.block[2].out_channels,
            bias=(not (self.se.block[2].bias is None)),
        )
        state_dict_fc1 = deepcopy(self.se.block[2].state_dict())
        state_dict_fc1["weight"] = state_dict_fc1["weight"].squeeze()
        fc1.load_state_dict(state_dict_fc1)
        sigmoid = deepcopy(self.se.block[3])
        """
        Output of linear layer has output shape of (B, C). Need to reshape to proper
        shape before multiplying with input tensor.
        """
        reshape_size_after_sigmoid = (input_blob_size[0], input_blob_size[1], 1, 1) + (
            (1,) if self.is_3d else ()
        )
        reshape1 = _Reshape(reshape_size_after_sigmoid)
        se_layers = nn.Sequential(
            avg_pool, reshape0, fc0, activation, fc1, sigmoid, reshape1
        )
        # Add final elementwise multiplication and replace self.se
        self.se = _SkipConnectMul(se_layers)
        self.convert_flag = True