def forward()

in easycv/models/backbones/conv_vitdet.py [0:0]


    def forward(self, images):
        outs = dict()
        x = images
        x = self.patch_embed1(x)
        x = self.pos_drop(x)
        for blk in self.blocks1:
            x = blk(x)
        outs['s0'] = x
        x = self.patch_embed2(x)
        for blk in self.blocks2:
            x = blk(x)
        outs['s1'] = x
        x = self.patch_embed3(x)
        x = x.flatten(2).permute(0, 2, 1)
        x = self.patch_embed4(x)
        x = x + self.pos_embed

        x = self.blocks3[0](x, rel_pos_bias=self.global_rel_pos_bias())
        x = rearrange(
            x,
            'b (h w) c -> b h w c',
            h=self.patch_embed3.grid_size[0],
            w=self.patch_embed3.grid_size[1],
        )
        x = rearrange(
            x,
            'b (h h1) (w w1) c -> (b h w) (h1 w1) c',
            h1=self.window_size,
            w1=self.window_size,
        )
        for blk in self.blocks3[1:3]:
            x = blk(x, rel_pos_bias=self.windowed_rel_pos_bias())

        x = rearrange(
            x,
            '(b h w) (h1 w1) c -> b (h h1 w w1) c',
            h=self.patch_embed3.grid_size[0] // self.window_size,
            w=self.patch_embed3.grid_size[1] // self.window_size,
            h1=self.window_size,
            w1=self.window_size,
        )
        x = self.blocks3[3](x, rel_pos_bias=self.global_rel_pos_bias())
        x = rearrange(
            x,
            'b (h w) c -> b h w c',
            h=self.patch_embed3.grid_size[0],
            w=self.patch_embed3.grid_size[1],
        )
        x = rearrange(
            x,
            'b (h h1) (w w1) c -> (b h w) (h1 w1) c',
            h1=self.window_size,
            w1=self.window_size,
        )
        for blk in self.blocks3[4:6]:
            x = blk(x, rel_pos_bias=self.windowed_rel_pos_bias())

        x = rearrange(
            x,
            '(b h w) (h1 w1) c -> b (h h1 w w1) c',
            h=self.patch_embed3.grid_size[0] // self.window_size,
            w=self.patch_embed3.grid_size[1] // self.window_size,
            h1=self.window_size,
            w1=self.window_size,
        )
        x = self.blocks3[6](x, rel_pos_bias=self.global_rel_pos_bias())
        x = rearrange(
            x,
            'b (h w) c -> b h w c',
            h=self.patch_embed3.grid_size[0],
            w=self.patch_embed3.grid_size[1],
        )
        x = rearrange(
            x,
            'b (h h1) (w w1) c -> (b h w) (h1 w1) c',
            h1=self.window_size,
            w1=self.window_size,
        )
        for blk in self.blocks3[7:10]:
            x = blk(x, rel_pos_bias=self.windowed_rel_pos_bias())

        x = rearrange(
            x,
            '(b h w) (h1 w1) c -> b (h h1 w w1) c',
            h=self.patch_embed3.grid_size[0] // self.window_size,
            w=self.patch_embed3.grid_size[1] // self.window_size,
            h1=self.window_size,
            w1=self.window_size,
        )
        x = self.blocks3[10](x, rel_pos_bias=self.global_rel_pos_bias())
        x = rearrange(
            x,
            'b (h w) c -> b c h w',
            h=self.patch_embed3.grid_size[0],
            w=self.patch_embed3.grid_size[1],
        )

        outs['s2'] = x
        outs['s3'] = self.ms_adaptor[-1](x)

        return [outs['s0'], outs['s1'], outs['s2'], outs['s3']]