in src/mlio/recordio_protobuf_reader.cc [626:713]
bool Recordio_protobuf_reader::Decoder::decode_feature(const Protobuf_tensor &tensor)
{
if (attr_->data_type() != dt) {
if (state_->warn_bad_instance || state_->error_bad_example) {
auto msg = fmt::format(
"The feature '{2}' of the instance #{1:n} in the data store '{0}' has the data type {3} while it is expected to have the data type {4}.",
instance_->data_store().id(),
instance_->index(),
attr_->name(),
dt,
attr_->data_type());
if (state_->warn_bad_instance) {
logger::warn(msg);
}
if (state_->error_bad_example) {
throw Invalid_instance_error{msg};
}
}
return false;
}
if (is_sparse(tensor) != attr_->sparse()) {
if (state_->warn_bad_instance || state_->error_bad_example) {
const char *ft{};
if (attr_->sparse()) {
ft =
"The feature '{2}' of the instance #{1:n} in the data store '{0}' is sparse while it is expected to be dense.";
}
else {
ft =
"The feature '{2}' of the instance #{1:n} in the data store '{0}' is dense while it is expected to be sparse.";
}
auto msg =
fmt::format(ft, instance_->data_store().id(), instance_->index(), attr_->name());
if (state_->warn_bad_instance) {
logger::warn(msg);
}
if (state_->error_bad_example) {
throw Invalid_instance_error{msg};
}
}
return false;
}
if (!shape_equals(tensor)) {
if (state_->warn_bad_instance || state_->error_bad_example) {
std::string shape_str;
if (tensor.shape().empty()) {
shape_str = fmt::to_string(tensor.values_size());
}
else {
shape_str = fmt::format("{0}", fmt::join(tensor.shape(), ", "));
}
const Size_vector &shape = attr_->shape();
auto msg = fmt::format(
"The feature '{2}' of the instance #{1:n} in the data store '{0}' has the shape ({3}) while it is expected to have the shape ({4}).",
instance_->data_store().id(),
instance_->index(),
attr_->name(),
shape_str,
fmt::join(shape.begin() + 1, shape.end(), ", "));
if (state_->warn_bad_instance) {
logger::warn(msg);
}
if (state_->error_bad_example) {
throw Invalid_instance_error{msg};
}
}
return false;
}
if (attr_->sparse()) {
return append_to_builder<dt>(tensor);
}
return copy_to_tensor<dt>(tensor);
}