def forward()

in models/yolo.py [0:0]


    def forward(self, x):
        # x = x.copy()  # for profiling
        masks, offsets, indices_per_layer = None, None, None
        if isinstance(x, tuple):
            if len(x) == 2:
                x, offsets = x  # offsets(bi,x1,y1,x2,y2)
            else:
                x, offsets, masks = x
                assert len(masks) == 1 and not isinstance(masks, torch.Tensor)
                if offsets is not None and hasattr(self, 'sparse') and self.sparse:
                    indices_per_layer = self.get_indices(offsets, masks[0])
            if offsets is not None:
                img_bs = torch.max(offsets[:, 0]).int().item() + 1
            else:
                img_bs = x[0].shape[0]
        else:
            img_bs = x[0].shape[0]
        
        device = x[0].device
        z = []  # inference output
        patch_offsets = []
        for i in range(self.nl):
            # if len(x) > self.nl:
            #     hid_feat_i = F.max_pool2d(x[self.nl * 2 - 1 - i], kernel_size=8, stride=8, padding=0)  # 这里有一个倒序关系
            #     x[i] = torch.cat((x[i], hid_feat_i), dim=1)
            
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            if offsets is not None:
                r = (2 ** (i - 1)) if self.nl == 4 else 2 ** i
                patch_off = torch.cat((offsets[:, :1], offsets[:, 1:] / r), dim=1)  # TODO: from 4 to 32
                patch_off_xy = patch_off[:, 1:3].view(-1, 1, 1, 1, 2)
                patch_offsets.append(patch_off)
            
            if indices_per_layer is not None:
                sp_x = self.m[i](x[i], indices_per_layer[i])  # sparse conv
                # x[i] = sp_x.dense(channels_first=True) deprecated # for training
            else:
                sp_x = None
                x[i] = self.m[i](x[i])  # conv
                x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.grid[i].shape[2:4] != (ny, nx) or self.onnx_dynamic:
                    self.grid[i] = self._make_grid(nx, ny).to(device)

                if sp_x is not None:
                    y = sp_x.features.sigmoid().view(-1, self.na, self.no)
                    bi, yi, xi = sp_x.indices.long().T
                    assert offsets is not None
                    grid_off = self.grid[i][0, 0, yi, xi].view(-1, 1, 2) + patch_off_xy[bi, ...].view(-1, 1, 2)
                    anch_wh = self.anchor_grid[i].view(1, self.na, 2)
                    batch_ind = offsets[bi, 0]  # [num_patches, 5] --> [num_objects, 5], compatible for box concat
                else:
                    y = x[i].sigmoid()
                    anch_wh = self.anchor_grid[i].view(1, self.na, 1, 1, 2)
                    if offsets is not None:
                        grid_off = self.grid[i] + patch_off_xy
                        batch_ind = offsets[:, 0]   
                    else:
                        grid_off = self.grid[i]
                        batch_ind = None

                if self.inplace:
                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + grid_off) * self.stride[i]  # xy
                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anch_wh  # wh
                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
                    xy = (y[..., 0:2] * 2. - 0.5 + grid_off) * self.stride[i]  # xy
                    wh = (y[..., 2:4] * 2) ** 2 * anch_wh  # wh
                    y = torch.cat((xy, wh, y[..., 4:]), -1)
                # y[..., 4] = 1.0
                
                if offsets is not None:
                    pbox = []
                    for bi in range(img_bs):
                        pbox_bi = y[batch_ind == bi]
                        np = len(pbox_bi)
                        if np:
                            pbox.append(pbox_bi.view(-1, self.no))
                        else:
                            pbox.append(torch.zeros((0, self.no), device=device))
                    max_pnum = max([len(boxes) for boxes in pbox])
                    z.append(torch.stack(
                        [torch.cat((boxes, torch.zeros((max_pnum - len(boxes), self.no), device=device))) for boxes in pbox]
                    ))
                else:
                    z.append(y.view(bs, -1, self.no))
            
        if offsets is not None:
            x = (x, patch_offsets)
        return x if self.training else (torch.cat(z, 1), x)