in ss_baselines/savi/models/visual_cnn.py [0:0]
def __init__(self, observation_space, output_size, extra_rgb=False):
super().__init__()
self._output_size = output_size
if "rgb" in observation_space.spaces and not extra_rgb:
self._n_input_rgb = observation_space.spaces["rgb"].shape[2]
else:
self._n_input_rgb = 0
if "depth" in observation_space.spaces:
self._n_input_depth = observation_space.spaces["depth"].shape[2]
else:
self._n_input_depth = 0
if "semantic" in observation_space.spaces:
self._n_input_semantic = 6
else:
self._n_input_semantic = 0
# kernel size for different CNN layers
self._cnn_layers_kernel_size = [(8, 8), (4, 4), (3, 3)]
# strides for different CNN layers
self._cnn_layers_stride = [(4, 4), (2, 2), (2, 2)]
if self._n_input_rgb > 0:
cnn_dims = np.array(
observation_space.spaces["rgb"].shape[:2], dtype=np.float32
)
elif self._n_input_depth > 0:
cnn_dims = np.array(
observation_space.spaces["depth"].shape[:2], dtype=np.float32
)
elif self._n_input_semantic > 0:
cnn_dims = np.array(
observation_space.spaces["semantic"].shape[:2], dtype=np.float32
)
if self.is_blind:
self.cnn = nn.Sequential()
else:
self._input_shape = (self._n_input_rgb + self._n_input_depth + self._n_input_semantic,
int(cnn_dims[0]), int(cnn_dims[1]))
for kernel_size, stride in zip(
self._cnn_layers_kernel_size, self._cnn_layers_stride
):
cnn_dims = self._conv_output_dim(
dimension=cnn_dims,
padding=np.array([0, 0], dtype=np.float32),
dilation=np.array([1, 1], dtype=np.float32),
kernel_size=np.array(kernel_size, dtype=np.float32),
stride=np.array(stride, dtype=np.float32),
)
self.cnn = nn.Sequential(
nn.Conv2d(
in_channels=self._n_input_rgb + self._n_input_depth + self._n_input_semantic,
out_channels=32,
kernel_size=self._cnn_layers_kernel_size[0],
stride=self._cnn_layers_stride[0],
),
nn.ReLU(True),
nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=self._cnn_layers_kernel_size[1],
stride=self._cnn_layers_stride[1],
),
nn.ReLU(True),
nn.Conv2d(
in_channels=64,
out_channels=64,
kernel_size=self._cnn_layers_kernel_size[2],
stride=self._cnn_layers_stride[2],
),
# nn.ReLU(True),
Flatten(),
nn.Linear(64 * cnn_dims[0] * cnn_dims[1], output_size),
nn.ReLU(True),
)
self.layer_init()