def __init__()

in src/transformers/models/vidtr/vidtr_compact.py [0:0]


    def __init__(self, d_model=512, nhead=8,
                 num_encoder_layers=6,
                 dim_feedforward=2048,
                 dropout=0.1, activation="relu",
                 normalize_before=False, patch_size=(1, 16, 16),
                 in_channel=3, activity_num=157, temporal_size=16,
                 layer_pool=[1, 3, 5],
                 number_of_keys=[2, 2, 2]):

        super().__init__()

        self.temporal_size = temporal_size

        self.conv_stem = nn.Conv3d(in_channels=in_channel, out_channels=d_model, kernel_size=patch_size,
                                   stride=patch_size, bias=True)

        layer_list = []
        for i in range(num_encoder_layers):
            if i in layer_pool:
                is_pool = True
                k = number_of_keys[layer_pool.index(i)]
            else:
                is_pool = False
                k = 0
            module_temp = TransformerEncoderLayer(d_model, nhead, dim_feedforward,
                                                  dropout, activation, normalize_before,
                                                  layer_index=i, pool=is_pool, k=k,
                                                  layer_pool=layer_pool,
                                                  number_of_keys=number_of_keys)

            layer_list.append(module_temp)
        encoder_layers = nn.ModuleList(layer_list)

        encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
        self.encoder = TransformerEncoder(encoder_layers, num_encoder_layers, encoder_norm)

        self._reset_parameters()

        self.d_model = d_model
        self.nhead = nhead
        self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
        self.fc = nn.Linear(in_features=d_model, out_features=activity_num, bias=True)

        self.cls = nn.Parameter(torch.Tensor(1, 1, d_model))

        self.pos_embedding_google = np.zeros((1, 14 * 14 + 1, d_model)).astype(np.float32)

        self.pos_embedding = nn.Parameter(torch.Tensor(1, self.temporal_size * 14 * 14 + 1, self.d_model))

        self.dropout = nn.Dropout(0.5)
        self.dp_pos = nn.Dropout(0.1)