in mask2former/modeling/backbone/swin.py [0:0]
def __init__(self, cfg, input_shape):
pretrain_img_size = cfg.MODEL.SWIN.PRETRAIN_IMG_SIZE
patch_size = cfg.MODEL.SWIN.PATCH_SIZE
in_chans = 3
embed_dim = cfg.MODEL.SWIN.EMBED_DIM
depths = cfg.MODEL.SWIN.DEPTHS
num_heads = cfg.MODEL.SWIN.NUM_HEADS
window_size = cfg.MODEL.SWIN.WINDOW_SIZE
mlp_ratio = cfg.MODEL.SWIN.MLP_RATIO
qkv_bias = cfg.MODEL.SWIN.QKV_BIAS
qk_scale = cfg.MODEL.SWIN.QK_SCALE
drop_rate = cfg.MODEL.SWIN.DROP_RATE
attn_drop_rate = cfg.MODEL.SWIN.ATTN_DROP_RATE
drop_path_rate = cfg.MODEL.SWIN.DROP_PATH_RATE
norm_layer = nn.LayerNorm
ape = cfg.MODEL.SWIN.APE
patch_norm = cfg.MODEL.SWIN.PATCH_NORM
use_checkpoint = cfg.MODEL.SWIN.USE_CHECKPOINT
super().__init__(
pretrain_img_size,
patch_size,
in_chans,
embed_dim,
depths,
num_heads,
window_size,
mlp_ratio,
qkv_bias,
qk_scale,
drop_rate,
attn_drop_rate,
drop_path_rate,
norm_layer,
ape,
patch_norm,
use_checkpoint=use_checkpoint,
)
self._out_features = cfg.MODEL.SWIN.OUT_FEATURES
self._out_feature_strides = {
"res2": 4,
"res3": 8,
"res4": 16,
"res5": 32,
}
self._out_feature_channels = {
"res2": self.num_features[0],
"res3": self.num_features[1],
"res4": self.num_features[2],
"res5": self.num_features[3],
}