def __init__()

in easycv/models/backbones/vision_transformer.py [0:0]


    def __init__(self,
                 img_size=[224],
                 patch_size=16,
                 in_chans=3,
                 num_classes=1000,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
                 mlp_ratio=4.,
                 qkv_bias=False,
                 qk_scale=None,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 norm_layer=partial(nn.LayerNorm, eps=1e-6),
                 global_pool=False,
                 use_layer_scale=False,
                 init_scale=1e-4,
                 hydra_attention=False,
                 hydra_attention_layers=None,
                 use_dpr_linspace=True,
                 **kwargs):
        super().__init__()

        if hydra_attention:
            if hydra_attention_layers is None:
                hydra_attention_layers = depth
            elif hydra_attention_layers > depth:
                raise ValueError(
                    'When using Hydra Attention, hydra_attention_Layers must be smaller than or equal to depth.'
                )

        self.num_features = self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.mlp_ratio = mlp_ratio
        self.qkv_bias = qkv_bias
        self.qk_scale = qk_scale
        self.drop_rate = drop_rate
        self.attn_drop_rate = attn_drop_rate
        self.norm_layer = norm_layer
        self.use_layer_scale = use_layer_scale
        self.init_scale = init_scale
        self.hydra_attention = hydra_attention
        self.hydra_attention_layers = hydra_attention_layers
        self.drop_path_rate = drop_path_rate
        self.depth = depth

        self.patch_embed = PatchEmbed(
            img_size=img_size[0],
            patch_size=patch_size,
            in_chans=in_chans,
            embed_dim=embed_dim)
        num_patches = self.patch_embed.num_patches

        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        if use_dpr_linspace:
            dpr = [
                x.item()
                for x in torch.linspace(0, self.drop_path_rate, self.depth)
            ]
        else:
            dpr = [drop_path_rate for x in range(self.depth)]
        self.dpr = dpr

        if self.hydra_attention:
            hy = [
                x >= (self.depth - self.hydra_attention_layers)
                for x in range(self.depth)
            ]
            head = [
                self.embed_dim if x >=
                (self.depth - self.hydra_attention_layers) else self.num_heads
                for x in range(self.depth)
            ]
        else:
            hy = [False for x in range(self.depth)]
            head = [self.num_heads for x in range(self.depth)]
        self.blocks = nn.ModuleList([
            Block(
                dim=embed_dim,
                num_heads=head[i],
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                qk_scale=qk_scale,
                drop=drop_rate,
                attn_drop=attn_drop_rate,
                drop_path=dpr[i],
                norm_layer=norm_layer,
                use_layer_scale=use_layer_scale,
                init_values=init_scale,
                hydra_attention=hy[i]) for i in range(depth)
        ])
        self.norm = norm_layer(embed_dim)

        # Classifier head
        self.head = nn.Linear(
            embed_dim, num_classes) if num_classes > 0 else nn.Identity()

        # Use global average pooling
        self.global_pool = global_pool
        if self.global_pool:
            self.fc_norm = norm_layer(embed_dim)
            self.norm = None