in cvnets/models/classification/mobilevit.py [0:0]
def __init__(self, opts, *args, **kwargs) -> None:
num_classes = getattr(opts, "model.classification.n_classes", 1000)
classifier_dropout = getattr(opts, "model.classification.classifier_dropout", 0.2)
pool_type = getattr(opts, "model.layer.global_pool", "mean")
image_channels = 3
out_channels = 16
mobilevit_config = get_configuration(opts=opts)
# Segmentation architectures like Deeplab and PSPNet modifies the strides of the classification backbones
# We allow that using `output_stride` arguments
output_stride = kwargs.get("output_stride", None)
dilate_l4 = dilate_l5 = False
if output_stride == 8:
dilate_l4 = True
dilate_l5 = True
elif output_stride == 16:
dilate_l5 = True
super(MobileViT, self).__init__()
self.dilation = 1
# store model configuration in a dictionary
self.model_conf_dict = dict()
self.conv_1 = ConvLayer(
opts=opts, in_channels=image_channels, out_channels=out_channels,
kernel_size=3, stride=2, use_norm=True, use_act=True
)
self.model_conf_dict['conv1'] = {'in': image_channels, 'out': out_channels}
in_channels = out_channels
self.layer_1, out_channels = self._make_layer(
opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer1"]
)
self.model_conf_dict['layer1'] = {'in': in_channels, 'out': out_channels}
in_channels = out_channels
self.layer_2, out_channels = self._make_layer(
opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer2"]
)
self.model_conf_dict['layer2'] = {'in': in_channels, 'out': out_channels}
in_channels = out_channels
self.layer_3, out_channels = self._make_layer(
opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer3"]
)
self.model_conf_dict['layer3'] = {'in': in_channels, 'out': out_channels}
in_channels = out_channels
self.layer_4, out_channels = self._make_layer(
opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer4"], dilate=dilate_l4
)
self.model_conf_dict['layer4'] = {'in': in_channels, 'out': out_channels}
in_channels = out_channels
self.layer_5, out_channels = self._make_layer(
opts=opts, input_channel=in_channels, cfg=mobilevit_config["layer5"], dilate=dilate_l5
)
self.model_conf_dict['layer5'] = {'in': in_channels, 'out': out_channels}
in_channels = out_channels
exp_channels = min(mobilevit_config["last_layer_exp_factor"] * in_channels, 960)
self.conv_1x1_exp = ConvLayer(
opts=opts, in_channels=in_channels, out_channels=exp_channels,
kernel_size=1, stride=1, use_act=True, use_norm=True
)
self.model_conf_dict['exp_before_cls'] = {'in': in_channels, 'out': exp_channels}
self.classifier = nn.Sequential()
self.classifier.add_module(name="global_pool", module=GlobalPool(pool_type=pool_type, keep_dim=False))
if 0.0 < classifier_dropout < 1.0:
self.classifier.add_module(name="dropout", module=Dropout(p=classifier_dropout, inplace=True))
self.classifier.add_module(
name="fc",
module=LinearLayer(in_features=exp_channels, out_features=num_classes, bias=True)
)
# check model
self.check_model()
# weight initialization
self.reset_parameters(opts=opts)