in runtime/native/python/treelite_runtime/predictor.py [0:0]
def from_npy2d(cls, mat, rbegin=0, rend=None, missing=None):
"""
Get a dense batch from a 2D numpy matrix.
If ``mat`` does not have ``order='C'`` (also known as row-major) or is not
contiguous, a temporary copy will be made.
If ``mat`` does not have ``dtype=numpy.float32``, a temporary copy will be
made also.
Thus, as many as two temporary copies of data can be made. One should set
input layout and type judiciously to conserve memory.
Parameters
----------
mat : object of type :py:class:`numpy.ndarray`, with dimension 2
data matrix
rbegin : :py:class:`int <python:int>`, optional
the index of the first row in the subset
rend : :py:class:`int <python:int>`, optional
one past the index of the last row in the subset. If missing, set to
the end of the matrix.
missing : :py:class:`float <python:float>`, optional
value indicating missing value. If missing, set to ``numpy.nan``.
Returns
-------
dense_batch : :py:class:`Batch`
a dense batch consisting of rows ``[rbegin, rend)``
"""
if not isinstance(mat, np.ndarray):
raise ValueError('mat must be of type numpy.ndarray')
if len(mat.shape) != 2:
raise ValueError('Input numpy.ndarray must be two-dimensional')
num_row = mat.shape[0]
num_col = mat.shape[1]
rbegin = rbegin if rbegin is not None else 0
rend = rend if rend is not None else num_row
if rbegin >= rend:
raise TreeliteError('rbegin must be less than rend')
if rbegin < 0:
raise TreeliteError('rbegin must be nonnegative')
if rend > num_row:
raise TreeliteError('rend must be less than number of rows in mat')
# flatten the array by rows and ensure it is float32.
# we try to avoid data copies if possible
# (reshape returns a view when possible and we explicitly tell np.array to
# avoid copying)
data_subset = np.array(mat[rbegin:rend, :].reshape((rend - rbegin)*num_col),
copy=False, dtype=np.float32)
missing = missing if missing is not None else np.nan
batch = Batch()
batch.handle = ctypes.c_void_p()
batch.kind = 'dense'
_check_call(_LIB.TreeliteAssembleDenseBatch(
data_subset.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
ctypes.c_float(missing),
ctypes.c_size_t(rend - rbegin),
ctypes.c_size_t(num_col),
ctypes.byref(batch.handle)))
# save handles for internal arrays
batch.data = data_subset
# save pointer to mat so that it doesn't get garbage-collected prematurely
batch.mat = mat
return batch