std::vector Schema::UpdateFeatureSelf()

in tensorflow_data_validation/anomalies/schema.cc [726:910]


std::vector<Description> Schema::UpdateFeatureSelf(Feature* feature) {
  std::vector<Description> descriptions;
  if (::tensorflow::data_validation::FeatureIsDeprecated(*feature)) {
    return descriptions;
  }
  if (!feature->has_name()) {
    descriptions.push_back(
        {tensorflow::metadata::v0::AnomalyInfo::FEATURE_MISSING_NAME,
         absl::StrCat(
             "unspecified name (maybe meant to be the empty string): find "
             "name rather than deprecating.")});
    // Deprecating the feature is the only possible "fix" here.
    ::tensorflow::data_validation::DeprecateFeature(feature);
    return descriptions;
  }

  if (!feature->has_type()) {
    if (feature->has_domain() || feature->has_string_domain()) {
      descriptions.push_back(
          {tensorflow::metadata::v0::AnomalyInfo::FEATURE_MISSING_TYPE,
           absl::StrCat("unspecified type: inferring the type to "
                        "be BYTES, given the domain specified.")});
      feature->set_type(tensorflow::metadata::v0::BYTES);
    } else {
      descriptions.push_back(
          {tensorflow::metadata::v0::AnomalyInfo::FEATURE_MISSING_TYPE,
           absl::StrCat("unspecified type: determine the type and "
                        "set it, rather than deprecating.")});
      // Deprecating the feature is the only possible "fix" here.
      ::tensorflow::data_validation::DeprecateFeature(feature);
      return descriptions;
    }
  }
  if (feature->presence().min_fraction() < 0.0) {
    feature->mutable_presence()->clear_min_fraction();
    descriptions.push_back(
        {tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
         "min_fraction should not be negative: clear is equal to zero"});
  }
  if (feature->presence().min_fraction() > 1.0) {
    feature->mutable_presence()->set_min_fraction(1.0);
    descriptions.push_back(
        {tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
         "min_fraction should not greater than 1"});
  }
  if (feature->value_count().min() < 0) {
    feature->mutable_value_count()->clear_min();
    descriptions.push_back(
        {tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
         "ValueCount.min should not be negative"});
  }
  if (feature->value_count().has_max() &&
      feature->value_count().max() < feature->value_count().min()) {
    descriptions.push_back(
        {tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
         "ValueCount.max should not be less than min"});
    feature->mutable_value_count()->set_max(feature->value_count().min());
  }
  for (int i = 0; i < feature->value_counts().value_count_size(); ++i) {
    if (feature->value_counts().value_count(i).min() < 0) {
      feature->mutable_value_counts()->mutable_value_count(i)->clear_min();
      descriptions.push_back(
          {tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
           "ValueCounts.min should not be negative",
           absl::StrCat("ValueCounts.min at level ", i,
                        " should not be negative.")});
    }
    if (feature->value_counts().value_count(i).has_max() &&
        feature->value_counts().value_count(i).max() <
            feature->value_counts().value_count(i).min()) {
      feature->mutable_value_counts()->mutable_value_count(i)->set_max(
          feature->value_counts().value_count(i).min());
      descriptions.push_back(
          {tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
           "ValueCounts.max should not be less than min",
           absl::StrCat("ValueCounts.max at level ", i,
                        " should not be less than min.")});
    }
  }

  for (const auto& dim : feature->shape().dim()) {
    if (dim.size() <= 0) {
      feature->clear_shape();
      descriptions.push_back(
          {tensorflow::metadata::v0::AnomalyInfo::INVALID_SCHEMA_SPECIFICATION,
           "Shape.dim.size must be a positive integer"});
      break;
    }
  }
  if (!ContainsKey(AllowedFeatureTypes(feature->domain_info_case()),
                   feature->type())) {
    descriptions.push_back(
        {tensorflow::metadata::v0::AnomalyInfo::DOMAIN_INVALID_FOR_TYPE,
         "The domain does not match the type",
         absl::StrCat(
             "The domain \"", GetDomainInfoName(*feature),
             "\" does not match the type: ",
             tensorflow::metadata::v0::FeatureType_Name(feature->type()))});
    // Note that this clears the oneof field domain_info.
    ::tensorflow::data_validation::ClearDomain(feature);
  }

  switch (feature->domain_info_case()) {
    case Feature::kDomain:
      if (GetExistingStringDomain(feature->domain()) == nullptr) {
        // Note that this clears the oneof field domain_info.
        feature->clear_domain();
        descriptions.push_back(
            {tensorflow::metadata::v0::AnomalyInfo::
                 INVALID_DOMAIN_SPECIFICATION,
             absl::StrCat("missing domain: ", feature->domain())});
      }
      break;
    case tensorflow::metadata::v0::Feature::kBoolDomain:
      if (feature->has_distribution_constraints()) {
        feature->clear_distribution_constraints();
        descriptions.push_back(
            {tensorflow::metadata::v0::AnomalyInfo::
                 INVALID_SCHEMA_SPECIFICATION,
             "distribution constraints not supported for bool domains."});
      }
      UpdateBoolDomainSelf(feature->mutable_bool_domain());
      break;
    case tensorflow::metadata::v0::Feature::kIntDomain:
      if (feature->has_distribution_constraints()) {
        feature->clear_distribution_constraints();
        descriptions.push_back(
            {tensorflow::metadata::v0::AnomalyInfo::
                 INVALID_SCHEMA_SPECIFICATION,
             "distribution constraints not supported for int domains."});
      }
      break;
    case tensorflow::metadata::v0::Feature::kFloatDomain:
      if (feature->has_distribution_constraints()) {
        feature->clear_distribution_constraints();
        descriptions.push_back(
            {tensorflow::metadata::v0::AnomalyInfo::
                 INVALID_SCHEMA_SPECIFICATION,
             "distribution constraints not supported for float domains."});
      }
      break;
    case tensorflow::metadata::v0::Feature::kStringDomain:
      UpdateStringDomainSelf(feature->mutable_string_domain());
      break;
    case tensorflow::metadata::v0::Feature::kStructDomain:
      if (feature->has_distribution_constraints()) {
        feature->clear_distribution_constraints();
        descriptions.push_back(
            {tensorflow::metadata::v0::AnomalyInfo::
                 INVALID_SCHEMA_SPECIFICATION,
             "distribution constraints not supported for struct domains."});
      }
      break;
    case Feature::kNaturalLanguageDomain:
    case Feature::kImageDomain:
    case Feature::kMidDomain:
    case Feature::kUrlDomain:
    case Feature::kTimeDomain:
      if (feature->has_distribution_constraints()) {
        feature->clear_distribution_constraints();
        descriptions.push_back(
            {tensorflow::metadata::v0::AnomalyInfo::
                 INVALID_SCHEMA_SPECIFICATION,
             "distribution constraints not supported for semantic domains."});
      }
      break;
    case Feature::DOMAIN_INFO_NOT_SET:
      if (feature->has_distribution_constraints()) {
        feature->clear_distribution_constraints();
        descriptions.push_back(
            {tensorflow::metadata::v0::AnomalyInfo::
                 INVALID_SCHEMA_SPECIFICATION,
             "distribution constraints require domain or string domain."});
      }
      break;
    default:
      descriptions.push_back(
          {tensorflow::metadata::v0::AnomalyInfo::INVALID_DOMAIN_SPECIFICATION,
           "internal issue: unknown domain_info type"});
      // Note that this clears the oneof field domain_info.
      ::tensorflow::data_validation::ClearDomain(feature);
  }

  return descriptions;
}