in c3dm/hypercolumnet.py [0:0]
def __init__( self,
trunk_arch='resnet50',
n_upsample=2,
hc_layers=[1,2,3,4],
hcdim=512,
pose_confidence=True,
depth_offset=0.,
smooth=False,
encode_input_keypoints = False,
kp_encoding_sig=1.,
dimout=1,
dimout_glob = 0,
dimout_glob_alpha = 0,
n_keypoints=12,
architecture='hypercolumns',
dilate_start=2,
glob_inst_norm=False,
final_std=0.01,
final_bias=-1.,
glob_activation=True,
pretrained=True ):
super().__init__()
auto_init_args(self)
trunk = getattr(torchvision.models,trunk_arch)(pretrained=pretrained)
# nfc = trunk.fc.in_features
self.layer0 = torch.nn.Sequential( trunk.conv1,
trunk.bn1,
trunk.relu,
trunk.maxpool )
if self.architecture=='hypercolumns':
for l in [1, 2, 3, 4]:
lname = 'layer%d'%l
setattr(self, lname, getattr(trunk,lname))
for hcl in hc_layers:
lname = 'hc_layer%d'%hcl
indim = getattr(trunk,'layer%d'%hcl)[-1].conv1.in_channels
# if ((self.dimout_glob + self.dimout_glob_alpha) > 0 \
# and hcl==hc_layers[-1]):
# if not self.smooth:
# glob_layers = [ torch.nn.Conv2d(indim, indim,1,bias=True,padding=0),
# torch.nn.ReLU(),
# nn.Conv2d(indim, self.dimout_glob+self.dimout_glob_alpha, \
# 1, bias=True, padding=0) ]
# if self.glob_activation:
# glob_layers.insert(1, \
# torch.nn.InstanceNorm2d(indim) if self.glob_inst_norm \
# else torch.nn.BatchNorm2d(indim))
# else:
# glob_layers = [ nn.Conv2d(indim, self.dimout_glob+self.dimout_glob_alpha, \
# 1, bias=True, padding=0) ]
# self.final_glob = torch.nn.Sequential(*glob_layers )
if self.encode_input_keypoints:
indim += self.n_keypoints
if not self.smooth:
layer_ = torch.nn.Sequential( \
torch.nn.Conv2d(indim, hcdim, 3, bias=True, padding=1),
torch.nn.BatchNorm2d(hcdim),
torch.nn.ReLU(),
torch.nn.Conv2d(hcdim, hcdim, 3, bias=True, padding=1),
)
else:
layer_ = torch.nn.Sequential( \
torch.nn.Conv2d(indim, hcdim, 3, bias=True, padding=1),
)
setattr(self, lname, layer_)
if not self.smooth:
up_layers = [ torch.nn.Conv2d(hcdim,hcdim,3,bias=True,padding=1),
torch.nn.BatchNorm2d(hcdim),
torch.nn.ReLU(),
nn.Conv2d(hcdim, dimout, 3, bias=True, padding=1) ]
else:
up_layers = [ nn.Conv2d(hcdim, dimout, 3, bias=True, padding=1) ]
llayer = up_layers[-1]
llayer.weight.data = \
llayer.weight.data.normal_(0., self.final_std)
if self.final_bias > -1.:
llayer.bias.data = \
llayer.bias.data.fill_(self.final_bias)
print('hcnet: final bias = %1.2e, final std=%1.2e' % \
(llayer.bias.data.mean(),
llayer.weight.data.std())
)
self.final = torch.nn.Sequential(*up_layers)
elif self.architecture=='dilated':
if self.dimout_glob > 0:
raise NotImplementedError('not done yet')
# for l in [1, 2, 3, 4]:
# lname = 'layer%d'%l
# setattr(self, lname, getattr(trunk,lname))
if self.encode_input_keypoints:
c1 = self.layer0[0]
wsz = list(c1.weight.data.shape)
wsz[1] = self.n_keypoints
c1_add = c1.weight.data.new_zeros( wsz ).normal_(0.,0.0001)
c1.weight.data = torch.cat( (c1.weight.data, c1_add), dim=1 )
c1.in_channels += self.n_keypoints
layers = [self.layer0]
li = 0
for l in [1,2,3,4]:
lname = 'layer%d'%l
m = getattr(trunk,lname)
if l >= self.dilate_start:
for mm in m.modules():
if type(mm) == torch.nn.Conv2d:
mm.stride = (1,1)
if mm.kernel_size==(3,3):
dil = (li+2)**2
mm.dilation = ( dil, dil )
mm.padding = ( dil, dil )
li += 1
layers.append(m)
# setattr(self, lname, m)
for m in layers[-1][-1].modules():
if hasattr(m, 'out_channels'):
lastdim = m.out_channels
if True: # deconv for final layer (2x higher resol)
layers.append( torch.nn.ConvTranspose2d( \
lastdim, dimout, kernel_size=3, \
stride=2, output_padding=1, padding=1, bias=True) )
else: # classic conv
layers.append( torch.nn.Conv2d( \
lastdim, dimout, kernel_size=3, \
stride=1, padding=1, bias=True) )
layers[-1].weight.data = \
layers[-1].weight.data.normal_(0., self.final_std)
self.trunk = torch.nn.Sequential(*layers )
self.mean = torch.FloatTensor([0.485, 0.456, 0.406])
self.std = torch.FloatTensor([0.229, 0.224, 0.225])