in easycv/models/backbones/conv_vitdet.py [0:0]
def forward(self, images):
outs = dict()
x = images
x = self.patch_embed1(x)
x = self.pos_drop(x)
for blk in self.blocks1:
x = blk(x)
outs['s0'] = x
x = self.patch_embed2(x)
for blk in self.blocks2:
x = blk(x)
outs['s1'] = x
x = self.patch_embed3(x)
x = x.flatten(2).permute(0, 2, 1)
x = self.patch_embed4(x)
x = x + self.pos_embed
x = self.blocks3[0](x, rel_pos_bias=self.global_rel_pos_bias())
x = rearrange(
x,
'b (h w) c -> b h w c',
h=self.patch_embed3.grid_size[0],
w=self.patch_embed3.grid_size[1],
)
x = rearrange(
x,
'b (h h1) (w w1) c -> (b h w) (h1 w1) c',
h1=self.window_size,
w1=self.window_size,
)
for blk in self.blocks3[1:3]:
x = blk(x, rel_pos_bias=self.windowed_rel_pos_bias())
x = rearrange(
x,
'(b h w) (h1 w1) c -> b (h h1 w w1) c',
h=self.patch_embed3.grid_size[0] // self.window_size,
w=self.patch_embed3.grid_size[1] // self.window_size,
h1=self.window_size,
w1=self.window_size,
)
x = self.blocks3[3](x, rel_pos_bias=self.global_rel_pos_bias())
x = rearrange(
x,
'b (h w) c -> b h w c',
h=self.patch_embed3.grid_size[0],
w=self.patch_embed3.grid_size[1],
)
x = rearrange(
x,
'b (h h1) (w w1) c -> (b h w) (h1 w1) c',
h1=self.window_size,
w1=self.window_size,
)
for blk in self.blocks3[4:6]:
x = blk(x, rel_pos_bias=self.windowed_rel_pos_bias())
x = rearrange(
x,
'(b h w) (h1 w1) c -> b (h h1 w w1) c',
h=self.patch_embed3.grid_size[0] // self.window_size,
w=self.patch_embed3.grid_size[1] // self.window_size,
h1=self.window_size,
w1=self.window_size,
)
x = self.blocks3[6](x, rel_pos_bias=self.global_rel_pos_bias())
x = rearrange(
x,
'b (h w) c -> b h w c',
h=self.patch_embed3.grid_size[0],
w=self.patch_embed3.grid_size[1],
)
x = rearrange(
x,
'b (h h1) (w w1) c -> (b h w) (h1 w1) c',
h1=self.window_size,
w1=self.window_size,
)
for blk in self.blocks3[7:10]:
x = blk(x, rel_pos_bias=self.windowed_rel_pos_bias())
x = rearrange(
x,
'(b h w) (h1 w1) c -> b (h h1 w w1) c',
h=self.patch_embed3.grid_size[0] // self.window_size,
w=self.patch_embed3.grid_size[1] // self.window_size,
h1=self.window_size,
w1=self.window_size,
)
x = self.blocks3[10](x, rel_pos_bias=self.global_rel_pos_bias())
x = rearrange(
x,
'b (h w) c -> b c h w',
h=self.patch_embed3.grid_size[0],
w=self.patch_embed3.grid_size[1],
)
outs['s2'] = x
outs['s3'] = self.ms_adaptor[-1](x)
return [outs['s0'], outs['s1'], outs['s2'], outs['s3']]