in tfx_bsl/cc/coders/example_numpy_decoder.cc [33:108]
absl::Status ExampleToNumpyDict(absl::string_view serialized_proto,
PyObject** result) {
// Import numpy. (This is actually a macro, and "ret" is the return value
// if import fails.)
import_array1(/*ret=*/absl::InternalError("Unable to import numpy."));
Example example;
if (!example.ParseFromArray(serialized_proto.data(),
serialized_proto.size())) {
return absl::DataLossError("Failed to parse input proto.");
}
// Initialize Python result dict.
*result = PyDict_New();
// Iterate over the features and add it to the dict.
for (const auto& p : example.features().feature()) {
const std::string& feature_name = p.first;
const Feature& feature = p.second;
PyObject* feature_values_ndarray;
switch (feature.kind_case()) {
case Feature::kBytesList: {
const auto& values = feature.bytes_list().value();
// Creating ndarray.
npy_intp values_dims[] = {static_cast<npy_intp>(values.size())};
feature_values_ndarray =
PyArray_SimpleNew(1, values_dims, PyArray_OBJECT);
PyObject** buffer =
reinterpret_cast<PyObject**>(PyArray_DATA(feature_values_ndarray));
for (int i = 0; i < values.size(); ++i) {
const std::string& v = values[i];
buffer[i] = PyBytes_FromStringAndSize(v.data(), v.size());
}
break;
}
case Feature::kFloatList: {
const auto& values = feature.float_list().value();
// Creating ndarray.
npy_intp values_dims[] = {static_cast<npy_intp>(values.size())};
feature_values_ndarray =
PyArray_SimpleNew(1, values_dims, PyArray_FLOAT32);
memcpy(reinterpret_cast<void*>(PyArray_DATA(feature_values_ndarray)),
values.data(), values.size() * sizeof(float));
break;
}
case Feature::kInt64List: {
const auto& values = feature.int64_list().value();
// Creating ndarray.
npy_intp values_dims[] = {static_cast<npy_intp>(values.size())};
feature_values_ndarray =
PyArray_SimpleNew(1, values_dims, PyArray_INT64);
memcpy(reinterpret_cast<void*>(PyArray_DATA(feature_values_ndarray)),
values.data(), values.size() * sizeof(int64_t));
break;
}
case Feature::KIND_NOT_SET: {
// If we have a feature with no value list, we consider it to be a
// missing value.
feature_values_ndarray = Py_None;
Py_INCREF(Py_None);
break;
}
default: {
return absl::DataLossError("Invalid value list in input proto.");
}
}
const int err = PyDict_SetItemString(
*result, feature_name.data(), feature_values_ndarray);
Py_XDECREF(feature_values_ndarray);
if (err == -1) {
return absl::InternalError("Failed to insert item into Dict.");
}
}
return absl::OkStatus();
}