models/cfg/esod/uavdt_yolov5m.yaml (39 lines of code) (raw):
# parameters
nc: 1 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple
# anchors 1024
anchors:
- [14,16, 22,26, 40,21] # P3/8
- [37,30, 24,46, 56,26] # P4/16
- [40,48, 85,59, 144,114] # P5/32
# YOLOv5 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2 ~ RF=(2-1)*1+3=4
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 ~ RF=(4-1)*2+3=9
[-1, 3, C3, [128]], # RF=9+2*(3*0.33)=9+2=11 or 9+6=15
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8 ~ RF=(11-1)*2+3=23 or (15-1)*2+3=31
[-1, 9, C3, [256]], # RF=23+2*(9*0.33)=23+6=29 or 31+18=49
[-1, 1, SPP, [256, [5, 9, 13]]],
# [-1, 1, ASPP, [256, [1, 2, 4, 6]]],
[[-1], 1, Segmenter, [1]],
[[4, -1], 1, HeatMapParser, [256, 8, 0.3]], # 7, from 4 or 5? : 4.
[-1, 1, Conv, [512, 3, 2]], # 8-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 10-P5/32
[-1, 1, SPP, [1024, [3, 5, 7]]], # 5,9,13
[-1, 3, C3, [1024, False]], # 12
]
# YOLOv5 head
head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 9], 1, Concat, [1]], # cat backbone P4
[-1, 3, C3, [512, False]], # 16
[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 7], 1, Concat, [1]], # cat backbone P3
[-1, 3, C3, [256, False]], # 20 (P3/8-small)
[-1, 1, Conv, [256, 3, 2]],
[[-1, 17], 1, Concat, [1]], # cat head P4
[-1, 3, C3, [512, False]], # 23 (P4/16-medium)
[-1, 1, Conv, [512, 3, 2]],
[[-1, 13], 1, Concat, [1]], # cat head P5
[-1, 3, C3, [1024, False]], # 26 (P5/32-large)
[[20, 23, 26], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]