in tensorflow_data_validation/anomalies/feature_statistics_validator.cc [174:266]
tensorflow::Status ValidateFeatureStatisticsWithSerializedInputs(
const string& feature_statistics_proto_string,
const string& schema_proto_string, const string& environment,
const string& previous_span_statistics_proto_string,
const string& serving_statistics_proto_string,
const string& previous_version_statistics_proto_string,
const string& features_needed_string,
const string& validation_config_string, const bool enable_diff_regions,
string* anomalies_proto_string) {
tensorflow::metadata::v0::Schema schema;
if (!schema.ParseFromString(schema_proto_string)) {
return tensorflow::errors::InvalidArgument("Failed to parse Schema proto.");
}
tensorflow::metadata::v0::DatasetFeatureStatistics feature_statistics;
if (!feature_statistics.ParseFromString(feature_statistics_proto_string)) {
return tensorflow::errors::InvalidArgument(
"Failed to parse DatasetFeatureStatistics proto.");
}
absl::optional<tensorflow::metadata::v0::DatasetFeatureStatistics>
previous_span_statistics = tensorflow::gtl::nullopt;
if (!previous_span_statistics_proto_string.empty()) {
tensorflow::metadata::v0::DatasetFeatureStatistics tmp_stats;
if (!tmp_stats.ParseFromString(previous_span_statistics_proto_string)) {
return tensorflow::errors::InvalidArgument(
"Failed to parse DatasetFeatureStatistics proto.");
}
previous_span_statistics = tmp_stats;
}
absl::optional<tensorflow::metadata::v0::DatasetFeatureStatistics>
serving_statistics = tensorflow::gtl::nullopt;
if (!serving_statistics_proto_string.empty()) {
tensorflow::metadata::v0::DatasetFeatureStatistics tmp_stats;
if (!tmp_stats.ParseFromString(serving_statistics_proto_string)) {
return tensorflow::errors::InvalidArgument(
"Failed to parse DatasetFeatureStatistics proto.");
}
serving_statistics = tmp_stats;
}
absl::optional<tensorflow::metadata::v0::DatasetFeatureStatistics>
previous_version_statistics = tensorflow::gtl::nullopt;
if (!previous_version_statistics_proto_string.empty()) {
tensorflow::metadata::v0::DatasetFeatureStatistics tmp_stats;
if (!tmp_stats.ParseFromString(previous_version_statistics_proto_string)) {
return tensorflow::errors::InvalidArgument(
"Failed to parse DatasetFeatureStatistics proto.");
}
previous_version_statistics = tmp_stats;
}
absl::optional<string> may_be_environment =
tensorflow::gtl::nullopt;
if (!environment.empty()) {
may_be_environment = environment;
}
absl::optional<FeaturesNeeded> features_needed = gtl::nullopt;
if (!features_needed_string.empty()) {
FeaturesNeededProto parsed_proto;
if (!parsed_proto.ParseFromString(features_needed_string)) {
return tensorflow::errors::InvalidArgument(
"Failed to parse FeaturesNeeded");
}
FeaturesNeeded parsed_feature_needed;
TF_RETURN_IF_ERROR(
FromFeaturesNeededProto(parsed_proto, &parsed_feature_needed));
if (!parsed_feature_needed.empty()) {
features_needed = parsed_feature_needed;
}
}
data_validation::ValidationConfig validation_config;
if (!validation_config.ParseFromString(validation_config_string)) {
return tensorflow::errors::InvalidArgument(
"Failed to parse ValidationConfig");
}
tensorflow::metadata::v0::Anomalies anomalies;
TF_RETURN_IF_ERROR(ValidateFeatureStatistics(
feature_statistics, schema, may_be_environment, previous_span_statistics,
serving_statistics, previous_version_statistics, features_needed,
validation_config, enable_diff_regions, &anomalies));
if (!anomalies.SerializeToString(anomalies_proto_string)) {
return tensorflow::errors::Internal(
"Could not serialize Anomalies output proto to string.");
}
return tensorflow::Status::OK();
}