in models/attentive_nas_dynamic_model.py [0:0]
def __init__(self, supernet, n_classes=1000, bn_param=(0., 1e-5)):
super(AttentiveNasDynamicModel, self).__init__()
self.supernet = supernet
self.n_classes = n_classes
self.use_v3_head = getattr(self.supernet, 'use_v3_head', False)
self.stage_names = ['first_conv', 'mb1', 'mb2', 'mb3', 'mb4', 'mb5', 'mb6', 'mb7', 'last_conv']
self.width_list, self.depth_list, self.ks_list, self.expand_ratio_list = [], [], [], []
for name in self.stage_names:
block_cfg = getattr(self.supernet, name)
self.width_list.append(block_cfg.c)
if name.startswith('mb'):
self.depth_list.append(block_cfg.d)
self.ks_list.append(block_cfg.k)
self.expand_ratio_list.append(block_cfg.t)
self.resolution_list = self.supernet.resolutions
self.cfg_candidates = {
'resolution': self.resolution_list ,
'width': self.width_list,
'depth': self.depth_list,
'kernel_size': self.ks_list,
'expand_ratio': self.expand_ratio_list
}
#first conv layer, including conv, bn, act
out_channel_list, act_func, stride = \
self.supernet.first_conv.c, self.supernet.first_conv.act_func, self.supernet.first_conv.s
self.first_conv = DynamicConvBnActLayer(
in_channel_list=int2list(3), out_channel_list=out_channel_list,
kernel_size=3, stride=stride, act_func=act_func,
)
# inverted residual blocks
self.block_group_info = []
blocks = []
_block_index = 0
feature_dim = out_channel_list
for stage_id, key in enumerate(self.stage_names[1:-1]):
block_cfg = getattr(self.supernet, key)
width = block_cfg.c
n_block = max(block_cfg.d)
act_func = block_cfg.act_func
ks = block_cfg.k
expand_ratio_list = block_cfg.t
use_se = block_cfg.se
self.block_group_info.append([_block_index + i for i in range(n_block)])
_block_index += n_block
output_channel = width
for i in range(n_block):
stride = block_cfg.s if i == 0 else 1
if min(expand_ratio_list) >= 4:
expand_ratio_list = [_s for _s in expand_ratio_list if _s >= 4] if i == 0 else expand_ratio_list
mobile_inverted_conv = DynamicMBConvLayer(
in_channel_list=feature_dim,
out_channel_list=output_channel,
kernel_size_list=ks,
expand_ratio_list=expand_ratio_list,
stride=stride,
act_func=act_func,
use_se=use_se,
channels_per_group=getattr(self.supernet, 'channels_per_group', 1)
)
shortcut = DynamicShortcutLayer(feature_dim, output_channel, reduction=stride)
blocks.append(MobileInvertedResidualBlock(mobile_inverted_conv, shortcut))
feature_dim = output_channel
self.blocks = nn.ModuleList(blocks)
last_channel, act_func = self.supernet.last_conv.c, self.supernet.last_conv.act_func
if not self.use_v3_head:
self.last_conv = DynamicConvBnActLayer(
in_channel_list=feature_dim, out_channel_list=last_channel,
kernel_size=1, act_func=act_func,
)
else:
expand_feature_dim = [f_dim * 6 for f_dim in feature_dim]
self.last_conv = nn.Sequential(collections.OrderedDict([
('final_expand_layer', DynamicConvBnActLayer(
feature_dim, expand_feature_dim, kernel_size=1, use_bn=True, act_func=act_func)
),
('pool', nn.AdaptiveAvgPool2d((1,1))),
('feature_mix_layer', DynamicConvBnActLayer(
in_channel_list=expand_feature_dim, out_channel_list=last_channel,
kernel_size=1, act_func=act_func, use_bn=False,)
),
]))
#final conv layer
self.classifier = DynamicLinearLayer(
in_features_list=last_channel, out_features=n_classes, bias=True
)
# set bn param
self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])
# runtime_depth
self.runtime_depth = [len(block_idx) for block_idx in self.block_group_info]
self.zero_residual_block_bn_weights()
self.active_dropout_rate = 0
self.active_drop_connect_rate = 0
self.active_resolution = 224