mujoco_py/mjbatchrenderer.pyx (182 lines of code) (raw):

try: import pycuda.driver as drv except ImportError: drv = None class MjBatchRendererException(Exception): pass class MjBatchRendererNotSupported(MjBatchRendererException): pass class CudaNotEnabledError(MjBatchRendererException): pass class CudaBufferNotMappedError(MjBatchRendererException): pass class CudaBufferMappedError(MjBatchRendererException): pass class MjBatchRenderer(object): """ Utility class for rendering into OpenGL Pixel Buffer Objects (PBOs), which allows for accessing multiple rendered images in batch. If used with CUDA (i.e. initialized with use_cuda=True), you need to call map/unmap when accessing CUDA buffer pointer. This is to ensure that all OpenGL instructions have completed: renderer = MjBatchRenderer(100, 100, use_cuda=True) renderer.render(sim) renderer.map() image = renderer.read() renderer.unmap() """ def __init__(self, width, height, batch_size=1, device_id=0, depth=False, use_cuda=False): """ Args: - width (int): Image width. - height (int): Image height. - batch_size (int): Size of batch to render into. Memory is allocated once upon initialization of object. - device_id (int): Device to use for storing the batch. - depth (bool): if True, render depth in addition to RGB. - use_cuda (bool): if True, use OpenGL-CUDA interop to map the PBO onto a CUDA buffer. """ # Early initialization to prevent failure in __del__ self._use_cuda = False self.pbo_depth, self.pbo_depth = 0, 0 if not usingEGL(): raise MjBatchRendererNotSupported( "MjBatchRenderer currently only supported with EGL-backed" "rendering context.") # Make sure OpenGL Context is available before creating PBOs initOpenGL(device_id) makeOpenGLContextCurrent(device_id) self.pbo_rgb = createPBO(width, height, batch_size, 0) self.pbo_depth = createPBO(width, height, batch_size, 1) if depth else 0 self._depth = depth self._device_id = device_id self._width = width self._height = height self._batch_size = batch_size self._current_batch_offset = 0 self._use_cuda = use_cuda self._cuda_buffers_are_mapped = False self._cuda_rgb_ptr, self._cuda_depth_ptr = None, None if use_cuda: self._init_cuda() def _init_cuda(self): if drv is None: raise ImportError("Failed to import pycuda.") # Use local imports so that we don't have to make pycuda # opengl interop a requirement from pycuda.gl import RegisteredBuffer drv.init() device = drv.Device(self._device_id) self._cuda_context = device.make_context() self._cuda_context.push() self._cuda_rgb_pbo = RegisteredBuffer(self.pbo_rgb) if self._depth: self._cuda_depth_pbo = RegisteredBuffer(self.pbo_depth) def map(self): """ Map OpenGL buffer to CUDA for reading. """ if not self._use_cuda: raise CudaNotEnabledError() elif self._cuda_buffers_are_mapped: return # just make it a no-op self._cuda_context.push() self._cuda_rgb_mapping = self._cuda_rgb_pbo.map() ptr, self._cuda_rgb_buf_size = ( self._cuda_rgb_mapping.device_ptr_and_size()) assert ptr is not None and self._cuda_rgb_buf_size > 0 if self._cuda_rgb_ptr is None: self._cuda_rgb_ptr = ptr # There doesn't seem to be a guarantee from the API that the # pointer will be the same between mappings, but empirically # this has been true. If this isn't true, we need to modify # the interface to MjBatchRenderer to make this clearer to user. # So, hopefully we won't hit this assert. assert self._cuda_rgb_ptr == ptr, ( "Mapped CUDA rgb buffer pointer %d doesn't match old pointer %d" % (ptr, self._cuda_rgb_ptr)) if self._depth: self._cuda_depth_mapping = self._cuda_depth_pbo.map() ptr, self._cuda_depth_buf_size = ( self._cuda_depth_mapping.device_ptr_and_size()) assert ptr is not None and self._cuda_depth_buf_size > 0 if self._cuda_depth_ptr is None: self._cuda_depth_ptr = ptr assert self._cuda_depth_ptr == ptr, ( "Mapped CUDA depth buffer pointer %d doesn't match old pointer %d" % (ptr, self._cuda_depth_ptr)) self._cuda_buffers_are_mapped = True def unmap(self): """ Unmap OpenGL buffer from CUDA so that it can be rendered into. """ if not self._use_cuda: raise CudaNotEnabledError() elif not self._cuda_buffers_are_mapped: return # just make it a no-op self._cuda_context.push() self._cuda_rgb_mapping.unmap() self._cuda_rgb_mapping = None self._cuda_rgb_ptr = None if self._depth: self._cuda_depth_mapping.unmap() self._cuda_depth_mapping = None self._cuda_depth_ptr = None self._cuda_buffers_are_mapped = False def prepare_render_context(self, sim): """ Set up the rendering context for an MjSim. Also happens automatically on `.render()`. """ for c in sim.render_contexts: if (c.offscreen and isinstance(c.opengl_context, OffscreenOpenGLContext) and c.opengl_context.device_id == self._device_id): return c return MjRenderContext(sim, device_id=self._device_id) def render(self, sim, camera_id=None, batch_offset=None): """ Render current scene from the MjSim into the buffer. By default the batch offset is automatically incremented with each call. It can be reset with the batch_offset parameter. This method doesn't return anything. Use the `.read` method to read the buffer, or access the buffer pointer directly with e.g. `.cuda_rgb_buffer_pointer` accessor. Args: - sim (MjSim): The simulator to use for rendering. - camera_id (int): MuJoCo id for the camera, from `sim.model.camera_name2id()`. - batch_offset (int): offset in batch to render to. """ if self._use_cuda and self._cuda_buffers_are_mapped: raise CudaBufferMappedError( "CUDA buffers must be unmapped before calling render.") if batch_offset is not None: if batch_offset < 0 or batch_offset >= self._batch_size: raise ValueError("batch_offset out of range") self._current_batch_offset = batch_offset # Ensure the correct device context is used (this takes ~1 µs) makeOpenGLContextCurrent(self._device_id) render_context = self.prepare_render_context(sim) render_context.update_offscreen_size(self._width, self._height) render_context.render(self._width, self._height, camera_id=camera_id) cdef mjrRect viewport viewport.left = 0 viewport.bottom = 0 viewport.width = self._width viewport.height = self._height cdef PyMjrContext con = <PyMjrContext> render_context.con copyFBOToPBO(con.ptr, self.pbo_rgb, self.pbo_depth, viewport, self._current_batch_offset) self._current_batch_offset = (self._current_batch_offset + 1) % self._batch_size def read(self): """ Transfer a copy of the buffer from the GPU to the CPU as a numpy array. Returns: - rgb_batch (numpy array): batch of rgb images in uint8 NHWC format - depth_batch (numpy array): batch of depth images in uint16 NHWC format """ if self._use_cuda: return self._read_cuda() else: return self._read_nocuda() def _read_cuda(self): if not self._cuda_buffers_are_mapped: raise CudaBufferNotMappedError( "CUDA buffers must be mapped before reading") rgb_arr = drv.from_device( self._cuda_rgb_ptr, shape=(self._batch_size, self._height, self._width, 3), dtype=np.uint8) if self._depth: depth_arr = drv.from_device( self._cuda_depth_ptr, shape=(self._batch_size, self._height, self._width), dtype=np.uint16) else: depth_arr = None return rgb_arr, depth_arr def _read_nocuda(self): rgb_arr = np.zeros(3 * self._width * self._height * self._batch_size, dtype=np.uint8) cdef unsigned char[::view.contiguous] rgb_view = rgb_arr depth_arr = np.zeros(self._width * self._height * self._batch_size, dtype=np.uint16) cdef unsigned short[::view.contiguous] depth_view = depth_arr if self._depth: readPBO(&rgb_view[0], &depth_view[0], self.pbo_rgb, self.pbo_depth, self._width, self._height, self._batch_size) depth_arr = depth_arr.reshape(self._batch_size, self._height, self._width) else: readPBO(&rgb_view[0], NULL, self.pbo_rgb, 0, self._width, self._height, self._batch_size) # Fine to throw aray depth_arr above since malloc/free is cheap depth_arr = None rgb_arr = rgb_arr.reshape(self._batch_size, self._height, self._width, 3) return rgb_arr, depth_arr @property def cuda_rgb_buffer_pointer(self): """ Pointer to CUDA buffer for RGB batch. """ if not self._use_cuda: raise CudaNotEnabledError() elif not self._cuda_buffers_are_mapped: raise CudaBufferNotMappedError() return self._cuda_rgb_ptr @property def cuda_depth_buffer_pointer(self): """ Pointer to CUDA buffer for depth batch. """ if not self._use_cuda: raise CudaNotEnabledError() elif not self._cuda_buffers_are_mapped: raise CudaBufferNotMappedError() if not self._depth: raise RuntimeError("Depth not enabled. Use depth=True on initialization.") return self._cuda_depth_ptr def __del__(self): if self._use_cuda: self._cuda_context.push() self.unmap() self._cuda_rgb_pbo.unregister() if self._depth: self._cuda_depth_pbo.unregister() # Clean up context drv.Context.pop() self._cuda_context.detach() if self.pbo_depth: freePBO(self.pbo_rgb) if self.pbo_depth: freePBO(self.pbo_depth)