in python/singa/layer.py [0:0]
def initialize(self, x):
self.in_channels = x.shape[1]
w_shape = (
self.nb_kernels,
int(self.in_channels / self.group),
self.kernel_size[0],
self.kernel_size[1],
)
self.W = Tensor(shape=w_shape,
requires_grad=True,
stores_grad=True,
device=x.device)
# std = math.sqrt(
# 2.0 / (self.in_channels * self.kernel_size[0] * self.kernel_size[1] +
# self.nb_kernels))
std = math.sqrt(
2.0 / (w_shape[1] * self.kernel_size[0] * self.kernel_size[1] +
self.nb_kernels))
self.W.gaussian(0.0, std)
if self.bias:
b_shape = (self.nb_kernels,)
self.b = Tensor(shape=b_shape,
requires_grad=True,
stores_grad=True,
device=x.device)
self.b.set_value(0.0)
else:
# to keep consistency when to do forward.
self.b = None
# Tensor(data=CTensor([]), requires_grad=False, stores_grad=False)
# if same pad mode, re-compute the padding
if self.pad_mode in ("SAME_UPPER", "SAME_LOWER"):
self.padding, self.odd_padding = utils.get_padding_shape(
self.pad_mode, x.shape[2:], self.kernel_size, self.stride)
self.padding = [self.padding[0], self.padding[2]]
_x = x
if self.odd_padding != (0, 0, 0, 0):
x_shape = list(x.data.shape())
x_shape[2] += (self.odd_padding[0] + self.odd_padding[1])
x_shape[3] += (self.odd_padding[2] + self.odd_padding[3])
_x = Tensor(shape=x_shape, device=x.device)
_x.set_value(0.0)
if _x.device.id() == -1:
if self.group != 1:
raise ValueError("Not implemented yet")
else:
if not hasattr(self, "handle"):
self.handle = singa.ConvHandle(
_x.data,
self.kernel_size,
self.stride,
self.padding,
self.in_channels,
self.nb_kernels,
self.bias,
self.group,
)
else:
if not hasattr(self, "handle"):
if _x.dtype == tensor.float16:
self.handle = singa.CudnnConvHandle(
_x.data,
self.kernel_size,
self.stride,
self.padding,
self.in_channels,
self.nb_kernels,
self.bias,
self.group,
1024*1024*1024,
"tensor_ops"
)
else:
self.handle = singa.CudnnConvHandle(
_x.data,
self.kernel_size,
self.stride,
self.padding,
self.in_channels,
self.nb_kernels,
self.bias,
self.group,
)