source/neuropod/bindings/python_bindings.cc (142 lines of code) (raw):
/* Copyright (c) 2020 The Neuropod Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "neuropod/bindings/python_bindings.hh"
#include "neuropod/internal/error_utils.hh"
#include "neuropod/internal/neuropod_tensor.hh"
#include "neuropod/internal/neuropod_tensor_raw_data_access.hh"
#include "neuropod/neuropod.hh"
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
namespace neuropod
{
namespace py = pybind11;
namespace
{
TensorType get_array_type(py::array &array)
{
#define IS_INSTANCE_CHECK(cpp_type, neuropod_type) \
if (py::isinstance<py::array_t<cpp_type>>(array)) \
return neuropod_type;
FOR_EACH_TYPE_MAPPING_EXCEPT_STRING(IS_INSTANCE_CHECK)
// Strings need to be handled separately because `py::isinstance` does not do
// what we want in this case.
if (array.dtype().kind() == 'S' || array.dtype().kind() == 'U')
{
return STRING_TENSOR;
}
NEUROPOD_ERROR("Unsupported array type in python bindings: {}", array.dtype().kind());
#undef IS_INSTANCE_CHECK
}
pybind11::dtype get_py_type(const NeuropodTensor &tensor)
{
#define GET_TYPE(CPP_TYPE, NEUROPOD_TYPE) \
case NEUROPOD_TYPE: { \
return pybind11::dtype::of<CPP_TYPE>(); \
}
const auto &tensor_type = tensor.get_tensor_type();
switch (tensor_type)
{
FOR_EACH_TYPE_MAPPING_EXCEPT_STRING(GET_TYPE)
default:
NEUROPOD_ERROR("Unsupported array type in python bindings: {}", tensor_type);
}
#undef GET_TYPE
}
std::shared_ptr<NeuropodTensor> tensor_from_string_numpy(NeuropodTensorAllocator &allocator,
py::array & array,
std::vector<int64_t> & shape)
{
// Unfortunately, for strings, we need to copy all the data in the tensor
auto tensor = allocator.allocate_tensor<std::string>(shape);
int max_len = array.itemsize();
int numel = tensor->get_num_elements();
// Get a pointer to the underlying data
char *data = static_cast<char *>(array.mutable_data());
std::vector<std::string> out;
std::string chars_to_strip("\0", 1);
for (int i = 0; i < numel * max_len; i += max_len)
{
std::string item(data + i, max_len);
// Remove null padding at the end
item.erase(item.find_last_not_of(chars_to_strip) + 1);
out.emplace_back(item);
}
// This potentially does another copy (depending on the backend)
tensor->copy_from(out);
return tensor;
}
} // namespace
std::shared_ptr<NeuropodTensor> tensor_from_numpy(NeuropodTensorAllocator &allocator, py::array array)
{
// Make sure the array is contiguous and aligned
// NOLINTNEXTLINE(readability-implicit-bool-conversion, hicpp-signed-bitwise)
if (!(array.flags() & py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_) ||
!(array.flags() & py::detail::npy_api::constants::NPY_ARRAY_ALIGNED_))
{
SPDLOG_WARN("Expected numpy array to be contiguous and aligned; converting...");
array = py::array::ensure(array,
py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_ |
py::detail::npy_api::constants::NPY_ARRAY_ALIGNED_);
}
auto ndims = array.ndim();
auto dims = array.shape();
auto dtype = get_array_type(array);
auto data = array.mutable_data();
// Capture the array in our deleter so it doesn't get deallocated
// until we're done
auto to_delete = std::make_shared<py::array>(array);
auto deleter = [to_delete](void *unused) mutable {
py::gil_scoped_acquire gil;
to_delete.reset();
};
// Create a vector with the shape info
std::vector<int64_t> shape(&dims[0], &dims[ndims]);
// Handle string tensors
if (dtype == STRING_TENSOR)
{
return tensor_from_string_numpy(allocator, array, shape);
}
// Wrap the data from the numpy array
return allocator.tensor_from_memory(shape, dtype, data, deleter);
}
py::array tensor_to_numpy(std::shared_ptr<NeuropodTensor> value)
{
auto tensor = value->as_tensor();
// This isn't going to be null, but we do a null check to keep
// static analyzers happy
if (tensor == nullptr)
{
NEUROPOD_ERROR("Error converting value to tensor");
}
auto dims = tensor->get_dims();
// Handle string tensors
if (tensor->get_tensor_type() == STRING_TENSOR)
{
// Special case for empty string tensors because the pybind functions below don't correctly set the
// type of the resulting array in this case
if (tensor->get_num_elements() == 0)
{
return py::array_t<std::array<char, 1>>(dims);
}
// We need to return as bytes to python since we don't know what encoding this has
// Get the data as a string vector
auto data_vec = tensor->as_typed_tensor<std::string>()->get_data_as_vector();
// Maybe there's a better way of doing this
size_t max_item_size_bytes = 0;
for (const auto &item : data_vec)
{
max_item_size_bytes = std::max(item.size(), max_item_size_bytes);
}
// Set up an array with the right format
py::dtype dt(fmt::format("|S{}", max_item_size_bytes));
auto arr = py::array(dt, size_t{tensor->get_num_elements()});
// Zero the whole array
memset(arr.mutable_data(), 0, arr.nbytes());
// Copy in the data
// This is unfortunate, but because there isn't really a standard underlying string tensor format
// across frameworks, we need to make a copy
for (size_t i = 0; i < data_vec.size(); i++)
{
memcpy(arr.mutable_data(i), data_vec.at(i).data(), data_vec.at(i).size());
}
// Resize to target dims
arr.resize(dims);
return arr;
}
auto data = internal::NeuropodTensorRawDataAccess::get_untyped_data_ptr(*tensor);
// Make sure we don't deallocate the tensor until the numpy array is deallocated
auto deleter = [value](void *unused) {};
auto deleter_handle = register_deleter(deleter, nullptr);
auto capsule = py::capsule(deleter_handle, [](void *handle) { run_deleter(handle); });
return py::array(get_py_type(*tensor), dims, data, capsule);
}
NeuropodValueMap from_numpy_dict(NeuropodTensorAllocator &allocator, py::dict &items)
{
// Convert from a py::dict of numpy arrays to an unordered_map of `NeuropodTensor`s
NeuropodValueMap out;
for (auto item : items)
{
out[item.first.cast<std::string>()] = tensor_from_numpy(allocator, item.second.cast<py::array>());
}
return out;
}
py::dict to_numpy_dict(NeuropodValueMap &items)
{
// Convert the items to a python dict of numpy arrays
py::dict out;
for (auto &item : items)
{
out[item.first.c_str()] = tensor_to_numpy(std::dynamic_pointer_cast<NeuropodTensor>(item.second));
}
return out;
}
} // namespace neuropod