in easycv/models/detection/detectors/yolox/yolo_pafpn.py [0:0]
def __init__(self,
depth=1.0,
width=1.0,
backbone='CSPDarknet',
neck_type='yolo',
neck_mode='all',
in_features=('dark3', 'dark4', 'dark5'),
in_channels=[256, 512, 1024],
depthwise=False,
act='silu',
use_att=None,
asff_channel=2,
expand_kernel=3):
super().__init__()
# build backbone
if backbone == 'CSPDarknet':
self.backbone = CSPDarknet(
depth, width, depthwise=depthwise, act=act)
elif backbone == 'RepVGGYOLOX':
self.backbone = RepVGGYOLOX(
in_channels=3, depth=depth, width=width)
else:
logging.warning(
'YOLOX-PAI backbone must in [CSPDarknet, RepVGGYOLOX], otherwise we use RepVGGYOLOX as default'
)
self.backbone = RepVGGYOLOX(
in_channels=3, depth=depth, width=width)
self.backbone_name = backbone
# build neck
self.in_features = in_features
self.in_channels = in_channels
Conv = DWConv if depthwise else BaseConv
self.neck_type = neck_type
self.neck_mode = neck_mode
if neck_type != 'gsconv':
if neck_type != 'yolo':
logging.warning(
'YOLOX-PAI backbone must in [yolo, gsconv], otherwise we use yolo as default'
)
self.neck_type = 'yolo'
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.lateral_conv0 = BaseConv(
int(in_channels[2] * width),
int(in_channels[1] * width),
1,
1,
act=act)
self.C3_p4 = CSPLayer(
int(2 * in_channels[1] * width),
int(in_channels[1] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act) # cat
self.reduce_conv1 = BaseConv(
int(in_channels[1] * width),
int(in_channels[0] * width),
1,
1,
act=act)
self.C3_p3 = CSPLayer(
int(2 * in_channels[0] * width),
int(in_channels[0] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act)
# bottom-up conv
self.bu_conv2 = Conv(
int(in_channels[0] * width),
int(in_channels[0] * width),
3,
2,
act=act)
self.C3_n3 = CSPLayer(
int(2 * in_channels[0] * width),
int(in_channels[1] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act)
# bottom-up conv
self.bu_conv1 = Conv(
int(in_channels[1] * width),
int(in_channels[1] * width),
3,
2,
act=act)
self.C3_n4 = CSPLayer(
int(2 * in_channels[1] * width),
int(in_channels[2] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act)
else:
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.gsconv1 = GSConv(
int(in_channels[2] * width),
int(in_channels[1] * width),
1,
1,
act=act)
self.gsconv2 = GSConv(
int(in_channels[1] * width),
int(in_channels[0] * width),
1,
1,
act=act)
self.gsconv4 = GSConv(
int(in_channels[0] * width),
int(in_channels[0] * width),
3,
2,
act=act)
self.gsconv5 = GSConv(
int(in_channels[1] * width),
int(in_channels[1] * width),
3,
2,
act=act)
if self.neck_mode == 'all':
self.vovGSCSP1 = VoVGSCSP(
int(2 * in_channels[1] * width),
int(in_channels[1] * width),
round(3 * depth),
False,
)
self.gsconv3 = GSConv(
int(2 * in_channels[0] * width),
int(2 * in_channels[0] * width),
1,
1,
act=act)
self.vovGSCSP2 = VoVGSCSP(
int(2 * in_channels[0] * width),
int(in_channels[0] * width),
round(3 * depth),
False,
)
self.vovGSCSP3 = VoVGSCSP(
int(2 * in_channels[0] * width),
int(in_channels[1] * width),
round(3 * depth),
False,
)
self.vovGSCSP4 = VoVGSCSP(
int(2 * in_channels[1] * width),
int(in_channels[2] * width),
round(3 * depth),
False,
)
else:
self.C3_p4 = CSPLayer(
int(2 * in_channels[1] * width),
int(in_channels[1] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act) # cat
self.C3_p3 = CSPLayer(
int(2 * in_channels[0] * width),
int(in_channels[0] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act)
self.C3_n3 = CSPLayer(
int(2 * in_channels[0] * width),
int(in_channels[1] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act)
self.C3_n4 = CSPLayer(
int(2 * in_channels[1] * width),
int(in_channels[2] * width),
round(3 * depth),
False,
depthwise=depthwise,
act=act)
# build attention after PAN
self.use_att = use_att
default_attention_list = ['ASFF', 'ASFF_sim']
if use_att is not None and use_att not in default_attention_list:
logging.warning(
'YOLOX-PAI backbone must in [ASFF, ASFF_sim], otherwise we use ASFF as default'
)
if self.use_att == 'ASFF' or self.use_att == 'ASFF_sim':
self.asff_1 = ASFF(
level=0,
type=self.use_att,
asff_channel=asff_channel,
expand_kernel=expand_kernel,
multiplier=width,
act=act,
)
self.asff_2 = ASFF(
level=1,
type=self.use_att,
asff_channel=asff_channel,
expand_kernel=expand_kernel,
multiplier=width,
act=act,
)
self.asff_3 = ASFF(
level=2,
type=self.use_att,
asff_channel=asff_channel,
expand_kernel=expand_kernel,
multiplier=width,
act=act,
)