in tensorflow_data_validation/anomalies/schema.cc [726:910]
std::vector<Description> Schema::UpdateFeatureSelf(Feature* feature) {
std::vector<Description> descriptions;
if (::tensorflow::data_validation::FeatureIsDeprecated(*feature)) {
return descriptions;
}
if (!feature->has_name()) {
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::FEATURE_MISSING_NAME,
absl::StrCat(
"unspecified name (maybe meant to be the empty string): find "
"name rather than deprecating.")});
// Deprecating the feature is the only possible "fix" here.
::tensorflow::data_validation::DeprecateFeature(feature);
return descriptions;
}
if (!feature->has_type()) {
if (feature->has_domain() || feature->has_string_domain()) {
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::FEATURE_MISSING_TYPE,
absl::StrCat("unspecified type: inferring the type to "
"be BYTES, given the domain specified.")});
feature->set_type(tensorflow::metadata::v0::BYTES);
} else {
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::FEATURE_MISSING_TYPE,
absl::StrCat("unspecified type: determine the type and "
"set it, rather than deprecating.")});
// Deprecating the feature is the only possible "fix" here.
::tensorflow::data_validation::DeprecateFeature(feature);
return descriptions;
}
}
if (feature->presence().min_fraction() < 0.0) {
feature->mutable_presence()->clear_min_fraction();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
"min_fraction should not be negative: clear is equal to zero"});
}
if (feature->presence().min_fraction() > 1.0) {
feature->mutable_presence()->set_min_fraction(1.0);
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
"min_fraction should not greater than 1"});
}
if (feature->value_count().min() < 0) {
feature->mutable_value_count()->clear_min();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
"ValueCount.min should not be negative"});
}
if (feature->value_count().has_max() &&
feature->value_count().max() < feature->value_count().min()) {
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
"ValueCount.max should not be less than min"});
feature->mutable_value_count()->set_max(feature->value_count().min());
}
for (int i = 0; i < feature->value_counts().value_count_size(); ++i) {
if (feature->value_counts().value_count(i).min() < 0) {
feature->mutable_value_counts()->mutable_value_count(i)->clear_min();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
"ValueCounts.min should not be negative",
absl::StrCat("ValueCounts.min at level ", i,
" should not be negative.")});
}
if (feature->value_counts().value_count(i).has_max() &&
feature->value_counts().value_count(i).max() <
feature->value_counts().value_count(i).min()) {
feature->mutable_value_counts()->mutable_value_count(i)->set_max(
feature->value_counts().value_count(i).min());
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
"ValueCounts.max should not be less than min",
absl::StrCat("ValueCounts.max at level ", i,
" should not be less than min.")});
}
}
for (const auto& dim : feature->shape().dim()) {
if (dim.size() <= 0) {
feature->clear_shape();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
"Shape.dim.size must be a positive integer"});
break;
}
}
if (!ContainsKey(AllowedFeatureTypes(feature->domain_info_case()),
feature->type())) {
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::DOMAIN_INVALID_FOR_TYPE,
"The domain does not match the type",
absl::StrCat(
"The domain \"", GetDomainInfoName(*feature),
"\" does not match the type: ",
tensorflow::metadata::v0::FeatureType_Name(feature->type()))});
// Note that this clears the oneof field domain_info.
::tensorflow::data_validation::ClearDomain(feature);
}
switch (feature->domain_info_case()) {
case Feature::kDomain:
if (GetExistingStringDomain(feature->domain()) == nullptr) {
// Note that this clears the oneof field domain_info.
feature->clear_domain();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::
INVALID_DOMAIN_SPECIFICATION,
absl::StrCat("missing domain: ", feature->domain())});
}
break;
case tensorflow::metadata::v0::Feature::kBoolDomain:
if (feature->has_distribution_constraints()) {
feature->clear_distribution_constraints();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::
INVALID_SCHEMA_SPECIFICATION,
"distribution constraints not supported for bool domains."});
}
UpdateBoolDomainSelf(feature->mutable_bool_domain());
break;
case tensorflow::metadata::v0::Feature::kIntDomain:
if (feature->has_distribution_constraints()) {
feature->clear_distribution_constraints();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::
INVALID_SCHEMA_SPECIFICATION,
"distribution constraints not supported for int domains."});
}
break;
case tensorflow::metadata::v0::Feature::kFloatDomain:
if (feature->has_distribution_constraints()) {
feature->clear_distribution_constraints();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::
INVALID_SCHEMA_SPECIFICATION,
"distribution constraints not supported for float domains."});
}
break;
case tensorflow::metadata::v0::Feature::kStringDomain:
UpdateStringDomainSelf(feature->mutable_string_domain());
break;
case tensorflow::metadata::v0::Feature::kStructDomain:
if (feature->has_distribution_constraints()) {
feature->clear_distribution_constraints();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::
INVALID_SCHEMA_SPECIFICATION,
"distribution constraints not supported for struct domains."});
}
break;
case Feature::kNaturalLanguageDomain:
case Feature::kImageDomain:
case Feature::kMidDomain:
case Feature::kUrlDomain:
case Feature::kTimeDomain:
if (feature->has_distribution_constraints()) {
feature->clear_distribution_constraints();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::
INVALID_SCHEMA_SPECIFICATION,
"distribution constraints not supported for semantic domains."});
}
break;
case Feature::DOMAIN_INFO_NOT_SET:
if (feature->has_distribution_constraints()) {
feature->clear_distribution_constraints();
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::
INVALID_SCHEMA_SPECIFICATION,
"distribution constraints require domain or string domain."});
}
break;
default:
descriptions.push_back(
{tensorflow::metadata::v0::AnomalyInfo::INVALID_DOMAIN_SPECIFICATION,
"internal issue: unknown domain_info type"});
// Note that this clears the oneof field domain_info.
::tensorflow::data_validation::ClearDomain(feature);
}
return descriptions;
}