in src/c_api/c_api.cc [97:142]
int TreeliteDMatrixCreateFromMat(const float* data,
size_t num_row,
size_t num_col,
float missing_value,
DMatrixHandle* out) {
const bool nan_missing = common::math::CheckNAN(missing_value);
API_BEGIN();
CHECK_LT(num_col, std::numeric_limits<uint32_t>::max())
<< "num_col argument is too big";
DMatrix* dmat = new DMatrix();
dmat->Clear();
auto& data_ = dmat->data;
auto& col_ind_ = dmat->col_ind;
auto& row_ptr_ = dmat->row_ptr;
// make an educated guess for initial sizes,
// so as to present initial wave of allocation
const size_t guess_size
= std::min(std::min(num_row * num_col, num_row * 1000),
static_cast<size_t>(64 * 1024 * 1024));
data_.reserve(guess_size);
col_ind_.reserve(guess_size);
row_ptr_.reserve(num_row + 1);
const float* row = &data[0]; // points to beginning of each row
for (size_t i = 0; i < num_row; ++i, row += num_col) {
for (size_t j = 0; j < num_col; ++j) {
if (common::math::CheckNAN(row[j])) {
CHECK(nan_missing)
<< "The missing_value argument must be set to NaN if there is any "
<< "NaN in the matrix.";
} else if (nan_missing || row[j] != missing_value) {
// row[j] is a valid entry
data_.push_back(row[j]);
col_ind_.push_back(static_cast<uint32_t>(j));
}
}
row_ptr_.push_back(data_.size());
}
data_.shrink_to_fit();
col_ind_.shrink_to_fit();
dmat->num_row = num_row;
dmat->num_col = num_col;
dmat->nelem = data_.size(); // some nonzeros may have been deleted as NaN
*out = static_cast<DMatrixHandle>(dmat);
API_END();
}