py::array tensor_to_numpy()

in source/neuropod/bindings/python_bindings.cc [141:203]


py::array tensor_to_numpy(std::shared_ptr<NeuropodTensor> value)
{
    auto tensor = value->as_tensor();

    // This isn't going to be null, but we do a null check to keep
    // static analyzers happy
    if (tensor == nullptr)
    {
        NEUROPOD_ERROR("Error converting value to tensor");
    }

    auto dims = tensor->get_dims();

    // Handle string tensors
    if (tensor->get_tensor_type() == STRING_TENSOR)
    {
        // Special case for empty string tensors because the pybind functions below don't correctly set the
        // type of the resulting array in this case
        if (tensor->get_num_elements() == 0)
        {
            return py::array_t<std::array<char, 1>>(dims);
        }

        // We need to return as bytes to python since we don't know what encoding this has

        // Get the data as a string vector
        auto data_vec = tensor->as_typed_tensor<std::string>()->get_data_as_vector();

        // Maybe there's a better way of doing this
        size_t max_item_size_bytes = 0;
        for (const auto &item : data_vec)
        {
            max_item_size_bytes = std::max(item.size(), max_item_size_bytes);
        }

        // Set up an array with the right format
        py::dtype dt(fmt::format("|S{}", max_item_size_bytes));
        auto      arr = py::array(dt, size_t{tensor->get_num_elements()});

        // Zero the whole array
        memset(arr.mutable_data(), 0, arr.nbytes());

        // Copy in the data
        // This is unfortunate, but because there isn't really a standard underlying string tensor format
        // across frameworks, we need to make a copy
        for (size_t i = 0; i < data_vec.size(); i++)
        {
            memcpy(arr.mutable_data(i), data_vec.at(i).data(), data_vec.at(i).size());
        }

        // Resize to target dims
        arr.resize(dims);
        return arr;
    }

    auto data = internal::NeuropodTensorRawDataAccess::get_untyped_data_ptr(*tensor);

    // Make sure we don't deallocate the tensor until the numpy array is deallocated
    auto deleter        = [value](void *unused) {};
    auto deleter_handle = register_deleter(deleter, nullptr);
    auto capsule        = py::capsule(deleter_handle, [](void *handle) { run_deleter(handle); });
    return py::array(get_py_type(*tensor), dims, data, capsule);
}