in tensorflow/tensorflow/core/kernels/example_parsing_ops.cc [433:828]
void Compute(OpKernelContext* ctx) override {
const Tensor* debug_name;
const Tensor* serialized;
OpInputList context_dense_keys;
OpInputList context_sparse_keys;
OpInputList context_dense_defaults;
OpInputList feature_list_dense_keys;
OpInputList feature_list_sparse_keys;
const Tensor* feature_list_dense_missing_assumed_empty;
OP_REQUIRES_OK(ctx, ctx->input("debug_name", &debug_name));
OP_REQUIRES_OK(ctx, ctx->input("serialized", &serialized));
OP_REQUIRES_OK(ctx, ctx->input("feature_list_dense_missing_assumed_empty",
&feature_list_dense_missing_assumed_empty));
OP_REQUIRES_OK(ctx,
ctx->input_list("context_dense_keys", &context_dense_keys));
OP_REQUIRES_OK(ctx, ctx->input_list("feature_list_dense_keys",
&feature_list_dense_keys));
OP_REQUIRES_OK(
ctx, ctx->input_list("context_sparse_keys", &context_sparse_keys));
OP_REQUIRES_OK(ctx, ctx->input_list("feature_list_sparse_keys",
&feature_list_sparse_keys));
OP_REQUIRES_OK(ctx, ctx->input_list("context_dense_defaults",
&context_dense_defaults));
std::vector<string> context_dense_keys_t(attrs_.num_context_dense);
std::vector<string> context_sparse_keys_t(attrs_.num_context_sparse);
std::vector<string> feature_list_dense_keys_t(
attrs_.num_feature_list_dense);
std::vector<string> feature_list_sparse_keys_t(
attrs_.num_feature_list_sparse);
std::unordered_set<string> feature_list_dense_missing_assumed_empty_set;
CHECK_EQ(context_dense_keys.size(), attrs_.num_context_dense);
CHECK_EQ(context_sparse_keys.size(), attrs_.num_context_sparse);
CHECK_EQ(feature_list_dense_keys.size(), attrs_.num_feature_list_dense);
CHECK_EQ(feature_list_sparse_keys.size(), attrs_.num_feature_list_sparse);
for (int di = 0; di < attrs_.num_context_dense; ++di) {
OP_REQUIRES(ctx,
TensorShapeUtils::IsScalar(context_dense_keys[di].shape()),
errors::InvalidArgument(
"Expected context_dense_keys[", di,
"] to be a scalar, got shape: ",
context_dense_keys[di].shape().DebugString()));
context_dense_keys_t[di] = context_dense_keys[di].scalar<tstring>()();
}
for (int di = 0; di < attrs_.num_context_sparse; ++di) {
OP_REQUIRES(ctx,
TensorShapeUtils::IsScalar(context_sparse_keys[di].shape()),
errors::InvalidArgument(
"Expected context_sparse_keys[", di,
"] to be a scalar, got shape: ",
context_sparse_keys[di].shape().DebugString()));
context_sparse_keys_t[di] = context_sparse_keys[di].scalar<tstring>()();
}
for (int di = 0; di < attrs_.num_feature_list_dense; ++di) {
OP_REQUIRES(
ctx, TensorShapeUtils::IsScalar(feature_list_dense_keys[di].shape()),
errors::InvalidArgument(
"Expected feature_list_dense_keys[", di,
"] to be a scalar, got shape: ",
feature_list_dense_keys[di].shape().DebugString()));
feature_list_dense_keys_t[di] =
feature_list_dense_keys[di].scalar<tstring>()();
}
for (int di = 0; di < attrs_.num_feature_list_sparse; ++di) {
OP_REQUIRES(
ctx, TensorShapeUtils::IsScalar(feature_list_sparse_keys[di].shape()),
errors::InvalidArgument(
"Expected feature_list_sparse_keys[", di,
"] to be a scalar, got shape: ",
feature_list_sparse_keys[di].shape().DebugString()));
feature_list_sparse_keys_t[di] =
feature_list_sparse_keys[di].scalar<tstring>()();
}
OP_REQUIRES(
ctx,
TensorShapeUtils::IsVector(
feature_list_dense_missing_assumed_empty->shape()),
errors::InvalidArgument(
"Expected feature_list_dense_missing_assumed_empty ",
"to be a vector, got shape: ",
feature_list_dense_missing_assumed_empty->shape().DebugString()));
auto feature_list_dense_missing_assumped_empty_t =
feature_list_dense_missing_assumed_empty->vec<tstring>();
for (int de = 0;
de < feature_list_dense_missing_assumed_empty->NumElements(); ++de) {
feature_list_dense_missing_assumed_empty_set.insert(
feature_list_dense_missing_assumped_empty_t(de));
}
bool has_debug_name = (debug_name->NumElements() > 0);
if (has_debug_name) {
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(debug_name->shape()),
errors::InvalidArgument(
"Expected debug_name to be a scalar, got shape: ",
debug_name->shape().DebugString()));
}
auto debug_name_t = debug_name->scalar<tstring>();
OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(serialized->shape()),
errors::InvalidArgument(
"Expected serialized to be a scalar, got shape: ",
serialized->shape().DebugString()));
OP_REQUIRES(ctx, context_dense_defaults.size() == attrs_.num_context_dense,
errors::InvalidArgument("Expected len(context_dense_defaults) "
"== len(context_dense_keys) but got: ",
context_dense_defaults.size(), " vs. ",
attrs_.num_context_dense));
std::vector<bool> required(attrs_.num_context_dense);
for (int d = 0; d < attrs_.num_context_dense; ++d) {
const Tensor& def_value = context_dense_defaults[d];
required[d] = (def_value.NumElements() == 0); // No default provided.
if (def_value.NumElements() > 0) {
OP_REQUIRES(ctx, def_value.shape() == attrs_.context_dense_shapes[d],
errors::InvalidArgument(
"def_value[", d,
"].shape() == ", def_value.shape().DebugString(),
" != context_dense_shapes_[", d,
"] == ", attrs_.context_dense_shapes[d].DebugString()));
OP_REQUIRES(
ctx, def_value.dtype() == attrs_.context_dense_types[d],
errors::InvalidArgument(
"context_dense_defaults[", d, "].dtype() == ",
DataTypeString(def_value.dtype()), " != context_dense_types_[",
d, "] == ", DataTypeString(attrs_.context_dense_types[d])));
}
}
auto serialized_t = serialized->scalar<tstring>();
OpOutputList context_sparse_indices;
OpOutputList context_sparse_values;
OpOutputList context_sparse_shapes;
OpOutputList context_dense_values;
OpOutputList feature_list_sparse_indices;
OpOutputList feature_list_sparse_values;
OpOutputList feature_list_sparse_shapes;
OpOutputList feature_list_dense_values;
OP_REQUIRES_OK(ctx, ctx->output_list("context_sparse_indices",
&context_sparse_indices));
OP_REQUIRES_OK(
ctx, ctx->output_list("context_sparse_values", &context_sparse_values));
OP_REQUIRES_OK(
ctx, ctx->output_list("context_sparse_shapes", &context_sparse_shapes));
OP_REQUIRES_OK(
ctx, ctx->output_list("context_dense_values", &context_dense_values));
OP_REQUIRES_OK(ctx, ctx->output_list("context_sparse_indices",
&context_sparse_indices));
OP_REQUIRES_OK(ctx, ctx->output_list("feature_list_sparse_indices",
&feature_list_sparse_indices));
OP_REQUIRES_OK(ctx, ctx->output_list("feature_list_sparse_values",
&feature_list_sparse_values));
OP_REQUIRES_OK(ctx, ctx->output_list("feature_list_sparse_shapes",
&feature_list_sparse_shapes));
OP_REQUIRES_OK(ctx, ctx->output_list("feature_list_dense_values",
&feature_list_dense_values));
#ifdef TENSORFLOW_LITE_PROTOS
SequenceExample ex;
#else
// Allocate the SequenceExample on an arena. Provides better memory locality
// and greatly speeds up destruction.
protobuf::ArenaOptions options;
// We have some hint of what the final proto size will be based on the size
// of the serialized bytes- use this to set a custom allocation strategy.
// Note that the default allocation strategy is quite conservative (min
// block size of 256 bytes, and a max of 8 kilobytes).
const size_t block_size = serialized_t().size() * 1.1;
options.start_block_size = std::max(options.start_block_size, block_size);
options.max_block_size = std::max(options.max_block_size, block_size);
protobuf::Arena arena(options);
auto& ex = *protobuf::Arena::CreateMessage<SequenceExample>(&arena);
#endif
OP_REQUIRES(
ctx, ParseProtoUnlimited(&ex, serialized_t()),
errors::InvalidArgument("Could not parse example input, value: '",
serialized_t(), "'"));
const string& name = (has_debug_name) ? debug_name_t() : "<unknown>";
const Features& context = ex.context();
const auto& context_dict = context.feature();
// Context Dense -----------------------------------------------------------
// Preallocate context_dense_values, since we know their sizes
for (int d = 0; d < attrs_.num_context_dense; ++d) {
TensorShape out_shape;
for (const int dim : attrs_.context_dense_shapes[d].dim_sizes())
out_shape.AddDim(dim);
Tensor* out = nullptr;
OP_REQUIRES_OK(ctx, context_dense_values.allocate(d, out_shape, &out));
}
for (int d = 0; d < attrs_.num_context_dense; ++d) {
const string& key = context_dense_keys_t[d];
const DataType& dtype = attrs_.context_dense_types[d];
const TensorShape& shape = attrs_.context_dense_shapes[d];
const auto& feature_found = context_dict.find(key);
OP_REQUIRES(
ctx, (feature_found != context_dict.end()) || !required[d],
errors::InvalidArgument("Name: ", name, ", Context feature '", key,
"' is required but could not be found."));
if (feature_found != context_dict.end()) {
const Feature& f = feature_found->second;
bool types_match;
OP_REQUIRES_OK(ctx, CheckTypesMatch(f, dtype, &types_match));
OP_REQUIRES(
ctx, types_match,
errors::InvalidArgument("Name: ", name, ", Context feature: ", key,
". Data types don't match. ",
"Expected type: ", DataTypeString(dtype),
" Feature is: ", ProtoDebugString(f)));
OP_REQUIRES_OK(ctx, FeatureDenseCopy(0, name, key, dtype, shape, f,
context_dense_values[d]));
} else {
RowDenseCopy(0, dtype, context_dense_defaults[d],
context_dense_values[d]);
}
}
// Context Sparse ----------------------------------------------------------
for (int d = 0; d < attrs_.num_context_sparse; ++d) {
const string& key = context_sparse_keys_t[d];
const DataType& dtype = attrs_.context_sparse_types[d];
const auto& feature_found = context_dict.find(key);
bool feature_has_data = // Found key & data type is set
(feature_found != context_dict.end() &&
(feature_found->second.kind_case() != Feature::KIND_NOT_SET));
if (feature_has_data) {
const Feature& f = feature_found->second;
bool types_match;
OP_REQUIRES_OK(ctx, CheckTypesMatch(f, dtype, &types_match));
OP_REQUIRES(
ctx, types_match,
errors::InvalidArgument("Name: ", name, ", Context feature: ", key,
". Data types don't match. ",
"Expected type: ", DataTypeString(dtype),
" Feature is: ", ProtoDebugString(f)));
Tensor feature_values = FeatureSparseCopy(0, key, dtype, f);
const int64 num_elements = feature_values.NumElements();
TensorShape indices_shape({num_elements, 1});
Tensor* sp_indices_d = nullptr;
Tensor* sp_shape_d = nullptr;
OP_REQUIRES_OK(ctx, context_sparse_indices.allocate(d, indices_shape,
&sp_indices_d));
context_sparse_values.set(d, feature_values);
OP_REQUIRES_OK(ctx, context_sparse_shapes.allocate(d, TensorShape({1}),
&sp_shape_d));
auto shape_t = sp_shape_d->vec<int64>();
shape_t(0) = num_elements;
auto indices_t = sp_indices_d->matrix<int64>();
std::iota(indices_t.data(), indices_t.data() + num_elements, 0);
} else {
TensorShape indices_shape({0, 1});
TensorShape values_shape({0});
Tensor* sp_indices_d = nullptr;
Tensor* sp_values_d = nullptr;
Tensor* sp_shape_d = nullptr;
OP_REQUIRES_OK(ctx, context_sparse_indices.allocate(d, indices_shape,
&sp_indices_d));
OP_REQUIRES_OK(
ctx, context_sparse_values.allocate(d, values_shape, &sp_values_d));
OP_REQUIRES_OK(ctx, context_sparse_shapes.allocate(d, TensorShape({1}),
&sp_shape_d));
auto shape_t = sp_shape_d->vec<int64>();
shape_t(0) = 0;
}
}
// Feature List Dense ------------------------------------------------------
// Preallocate context_dense_values, since we can infer their
// sizes
const FeatureLists& feature_lists = ex.feature_lists();
const auto& feature_list_dict = feature_lists.feature_list();
FeatureList empty_feature_list; // Placeholder for missing FLs
for (int d = 0; d < attrs_.num_feature_list_dense; ++d) {
const string& key = feature_list_dense_keys_t[d];
const DataType& dtype = attrs_.feature_list_dense_types[d];
const TensorShape& shape = attrs_.feature_list_dense_shapes[d];
const auto& feature_list_found = feature_list_dict.find(key);
bool feature_list_missing =
(feature_list_found == feature_list_dict.end());
bool feature_list_allowed_missing =
(feature_list_dense_missing_assumed_empty_set.count(key) > 0);
OP_REQUIRES(
ctx, !feature_list_missing || feature_list_allowed_missing,
errors::InvalidArgument("Name: ", name, ", Feature list '", key,
"' is required but could not be found. "
"Did you mean to include it in "
"feature_list_dense_missing_assumed_empty or "
"feature_list_dense_defaults?"));
TensorShape out_shape;
const FeatureList& fl = (feature_list_missing)
? empty_feature_list
: feature_list_found->second;
out_shape.AddDim(fl.feature_size());
for (const int dim : attrs_.feature_list_dense_shapes[d].dim_sizes()) {
out_shape.AddDim(dim);
}
Tensor* out = nullptr;
OP_REQUIRES_OK(ctx,
feature_list_dense_values.allocate(d, out_shape, &out));
for (int64 t = 0; t < fl.feature_size(); ++t) {
const Feature& f = fl.feature(t);
bool types_match;
OP_REQUIRES_OK(ctx, CheckTypesMatch(f, dtype, &types_match));
OP_REQUIRES(ctx, types_match,
errors::InvalidArgument(
"Name: ", name, ", Feature list: ", key, ", Index: ", t,
". Data types don't match. ",
"Expected type: ", DataTypeString(dtype),
" Feature is: ", ProtoDebugString(f)));
OP_REQUIRES_OK(ctx, FeatureDenseCopy(t, name, key, dtype, shape, f,
feature_list_dense_values[d]));
}
}
// Feature List Sparse -----------------------------------------------------
for (int d = 0; d < attrs_.num_feature_list_sparse; ++d) {
const string& key = feature_list_sparse_keys_t[d];
const DataType& dtype = attrs_.feature_list_sparse_types[d];
const auto& feature_list_found = feature_list_dict.find(key);
bool feature_list_has_data = // Found key
(feature_list_found != feature_list_dict.end());
std::vector<Tensor> sparse_values_tmp;
int64 feature_list_size = 0;
if (feature_list_has_data) {
const FeatureList& fl = feature_list_found->second;
feature_list_size = fl.feature_size();
for (int64 t = 0; t < feature_list_size; ++t) {
const Feature& f = fl.feature(t);
bool types_match;
OP_REQUIRES_OK(ctx, CheckTypesMatch(f, dtype, &types_match));
OP_REQUIRES(
ctx, f.kind_case() == Feature::KIND_NOT_SET || types_match,
errors::InvalidArgument("Name: ", name, ", Feature List: ", key,
", Index: ", t,
". Data types don't match. ",
"Expected type: ", DataTypeString(dtype),
" Feature is: ", ProtoDebugString(f)));
sparse_values_tmp.push_back(FeatureSparseCopy(t, key, dtype, f));
}
} else {
sparse_values_tmp.push_back(Tensor(dtype, TensorShape({0})));
}
int64 total_num_features = 0;
int64 max_num_features = 0;
for (int t = 0; t < feature_list_size; ++t) {
const Tensor& v = sparse_values_tmp[t];
const int64 num_elements = v.shape().num_elements();
total_num_features += num_elements;
max_num_features = std::max(max_num_features, num_elements);
}
TensorShape indices_shape({total_num_features, 2});
TensorShape values_shape({total_num_features});
Tensor* sp_indices_d = nullptr;
Tensor* sp_values_d = nullptr;
Tensor* sp_shape_d = nullptr;
OP_REQUIRES_OK(ctx, feature_list_sparse_indices.allocate(d, indices_shape,
&sp_indices_d));
OP_REQUIRES_OK(ctx, feature_list_sparse_values.allocate(d, values_shape,
&sp_values_d));
OP_REQUIRES_OK(ctx, feature_list_sparse_shapes.allocate(
d, TensorShape({2}), &sp_shape_d));
auto shape_t = sp_shape_d->vec<int64>();
shape_t(0) = feature_list_size;
shape_t(1) = max_num_features;
int64 offset = 0;
for (int t = 0; t < feature_list_size; ++t) {
const int64 num_elements = CopyIntoSparseTensor(
sparse_values_tmp[t], t, offset, sp_indices_d, sp_values_d);
offset += num_elements;
}
}
}