in models/yolo.py [0:0]
def forward(self, x):
# x = x.copy() # for profiling
masks, offsets, indices_per_layer = None, None, None
if isinstance(x, tuple):
if len(x) == 2:
x, offsets = x # offsets(bi,x1,y1,x2,y2)
else:
x, offsets, masks = x
assert len(masks) == 1 and not isinstance(masks, torch.Tensor)
if offsets is not None and hasattr(self, 'sparse') and self.sparse:
indices_per_layer = self.get_indices(offsets, masks[0])
if offsets is not None:
img_bs = torch.max(offsets[:, 0]).int().item() + 1
else:
img_bs = x[0].shape[0]
else:
img_bs = x[0].shape[0]
device = x[0].device
z = [] # inference output
patch_offsets = []
for i in range(self.nl):
# if len(x) > self.nl:
# hid_feat_i = F.max_pool2d(x[self.nl * 2 - 1 - i], kernel_size=8, stride=8, padding=0) # 这里有一个倒序关系
# x[i] = torch.cat((x[i], hid_feat_i), dim=1)
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
if offsets is not None:
r = (2 ** (i - 1)) if self.nl == 4 else 2 ** i
patch_off = torch.cat((offsets[:, :1], offsets[:, 1:] / r), dim=1) # TODO: from 4 to 32
patch_off_xy = patch_off[:, 1:3].view(-1, 1, 1, 1, 2)
patch_offsets.append(patch_off)
if indices_per_layer is not None:
sp_x = self.m[i](x[i], indices_per_layer[i]) # sparse conv
# x[i] = sp_x.dense(channels_first=True) deprecated # for training
else:
sp_x = None
x[i] = self.m[i](x[i]) # conv
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference
if self.grid[i].shape[2:4] != (ny, nx) or self.onnx_dynamic:
self.grid[i] = self._make_grid(nx, ny).to(device)
if sp_x is not None:
y = sp_x.features.sigmoid().view(-1, self.na, self.no)
bi, yi, xi = sp_x.indices.long().T
assert offsets is not None
grid_off = self.grid[i][0, 0, yi, xi].view(-1, 1, 2) + patch_off_xy[bi, ...].view(-1, 1, 2)
anch_wh = self.anchor_grid[i].view(1, self.na, 2)
batch_ind = offsets[bi, 0] # [num_patches, 5] --> [num_objects, 5], compatible for box concat
else:
y = x[i].sigmoid()
anch_wh = self.anchor_grid[i].view(1, self.na, 1, 1, 2)
if offsets is not None:
grid_off = self.grid[i] + patch_off_xy
batch_ind = offsets[:, 0]
else:
grid_off = self.grid[i]
batch_ind = None
if self.inplace:
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + grid_off) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anch_wh # wh
else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
xy = (y[..., 0:2] * 2. - 0.5 + grid_off) * self.stride[i] # xy
wh = (y[..., 2:4] * 2) ** 2 * anch_wh # wh
y = torch.cat((xy, wh, y[..., 4:]), -1)
# y[..., 4] = 1.0
if offsets is not None:
pbox = []
for bi in range(img_bs):
pbox_bi = y[batch_ind == bi]
np = len(pbox_bi)
if np:
pbox.append(pbox_bi.view(-1, self.no))
else:
pbox.append(torch.zeros((0, self.no), device=device))
max_pnum = max([len(boxes) for boxes in pbox])
z.append(torch.stack(
[torch.cat((boxes, torch.zeros((max_pnum - len(boxes), self.no), device=device))) for boxes in pbox]
))
else:
z.append(y.view(bs, -1, self.no))
if offsets is not None:
x = (x, patch_offsets)
return x if self.training else (torch.cat(z, 1), x)