models/cfg/vanilla/visdrone_retinanet.yaml (28 lines of code) (raw):
# parameters
nc: 10 # number of classes
# depth_multiple: 1.33 # model depth multiple
# width_multiple: 1.25 # layer channel multiple
# # anchors 1920
# anchors:
# - [8,10, 10,22, 20,18] # P3/8
# - [23,40, 49,28, 41,59] # P4/16
# - [93,48, 81,93, 170,140] # P5/32
depth_multiple: 1.00 # model depth multiple
width_multiple: 1.00 # layer channel multiple
# anchors 1536
anchors:
- [6,8, 10,22, 19,15] # P3/8
- [19,32, 39,22, 32,47] # P4/16
- [74,38, 65,74, 136,112] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 7, 2, None, 1, 'nn.ReLU()']], # 0-P1/2
[-1, 1, nn.MaxPool2d, [3, 2, 1]], # 1-P2/4
[-1, 1, ResBlockLayer, [256, 1, 3]], #
[-1, 1, ResBlockLayer, [512, 2, 4]], # 3-P3/8
# [-1, 1, DWConv, [256, 13, 1]],
# [[-1], 1, Segmenter, [1]],
# [[4, -1], 1, HeatMapParser, [256, 8, 0.5]], # 7
[-1, 1, ResBlockLayer, [1024, 2, 6]], # 4-P4/16
[-1, 1, ResBlockLayer, [2048, 2, 3]], # 5-P5/32
]
# YOLOv5 head
head:
[[5, 1, nn.Conv2d, [256, 1, 1, 0]], # [c2, k, s, p]
[-1, 1, nn.Conv2d, [256, 3, 1, 1]], # 7 (P5/32-large)
[-2, 1, nn.Upsample, [None, 2, 'nearest']],
[4, 1, nn.Conv2d, [256, 1, 1, 0]], # load backbone P4
[[-1, -2], 1, Add, []],
[-1, 1, nn.Conv2d, [256, 3, 1, 1]], # 11 (P4/16-medium)
[-2, 1, nn.Upsample, [None, 2, 'nearest']],
[3, 1, nn.Conv2d, [256, 1, 1, 0]], # load backbone P3
[[-1, -2], 1, Add, []],
[-1, 1, nn.Conv2d, [256, 3, 1, 1]], # 15 (P3/8-small)
[[15, 11, 7], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]