def from_npy2d()

in runtime/native/python/treelite_runtime/predictor.py [0:0]


  def from_npy2d(cls, mat, rbegin=0, rend=None, missing=None):
    """
    Get a dense batch from a 2D numpy matrix.
    If ``mat`` does not have ``order='C'`` (also known as row-major) or is not
    contiguous, a temporary copy will be made.
    If ``mat`` does not have ``dtype=numpy.float32``, a temporary copy will be
    made also.
    Thus, as many as two temporary copies of data can be made. One should set
    input layout and type judiciously to conserve memory.

    Parameters
    ----------
    mat : object of type :py:class:`numpy.ndarray`, with dimension 2
        data matrix
    rbegin : :py:class:`int <python:int>`, optional
        the index of the first row in the subset
    rend : :py:class:`int <python:int>`, optional
        one past the index of the last row in the subset. If missing, set to
        the end of the matrix.
    missing : :py:class:`float <python:float>`, optional
        value indicating missing value. If missing, set to ``numpy.nan``.

    Returns
    -------
    dense_batch : :py:class:`Batch`
        a dense batch consisting of rows ``[rbegin, rend)``
    """
    if not isinstance(mat, np.ndarray):
      raise ValueError('mat must be of type numpy.ndarray')
    if len(mat.shape) != 2:
      raise ValueError('Input numpy.ndarray must be two-dimensional')
    num_row = mat.shape[0]
    num_col = mat.shape[1]
    rbegin = rbegin if rbegin is not None else 0
    rend = rend if rend is not None else num_row
    if rbegin >= rend:
      raise TreeliteError('rbegin must be less than rend')
    if rbegin < 0:
      raise TreeliteError('rbegin must be nonnegative')
    if rend > num_row:
      raise TreeliteError('rend must be less than number of rows in mat')
    # flatten the array by rows and ensure it is float32.
    # we try to avoid data copies if possible
    # (reshape returns a view when possible and we explicitly tell np.array to
    #  avoid copying)
    data_subset = np.array(mat[rbegin:rend, :].reshape((rend - rbegin)*num_col),
                           copy=False, dtype=np.float32)
    missing = missing if missing is not None else np.nan

    batch = Batch()
    batch.handle = ctypes.c_void_p()
    batch.kind = 'dense'
    _check_call(_LIB.TreeliteAssembleDenseBatch(
        data_subset.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
        ctypes.c_float(missing),
        ctypes.c_size_t(rend - rbegin),
        ctypes.c_size_t(num_col),
        ctypes.byref(batch.handle)))
    # save handles for internal arrays
    batch.data = data_subset
    # save pointer to mat so that it doesn't get garbage-collected prematurely
    batch.mat = mat
    return batch