IntIntervalResult GetIntInterval()

in tensorflow_data_validation/anomalies/int_domain_util.cc [68:124]


IntIntervalResult GetIntInterval(const FeatureStatsView& feature_stats_view) {
  // Extract string values upfront as it can be useful for categorical INT
  // features.
  const std::vector<string> string_values =
      feature_stats_view.GetStringValues();
  switch (feature_stats_view.type()) {
    case FeatureNameStatistics::STRUCT:
      return absl::nullopt;
    case FeatureNameStatistics::FLOAT:
      return absl::nullopt;
    case FeatureNameStatistics::INT: {
      if (string_values.empty()) {
        // IntDomain is interpreted as being castable to Int64, so we validate
        // that this can be done and consider as a non-conformant IntDomain if
        // it cannot. Note: if the IntDomain has no min and max specified, this
        // will not trigger an anomaly.
        if (feature_stats_view.num_stats().min() < LLONG_MIN) {
          return std::to_string(feature_stats_view.num_stats().min());
        }
        if (feature_stats_view.num_stats().max() > LLONG_MAX) {
          return std::to_string(feature_stats_view.num_stats().max());
        }
        return IntInterval{
            static_cast<int64>(feature_stats_view.num_stats().min()),
            static_cast<int64>(feature_stats_view.num_stats().max())};
      }
      // Intentionally fall through BYTES, STRING case for categorical integer
      // features.
      ABSL_FALLTHROUGH_INTENDED;
    }
    case FeatureNameStatistics::BYTES:
    case FeatureNameStatistics::STRING: {
      absl::optional<IntInterval> interval;
      for (const string& str : string_values) {
        int64 value;
        if (!absl::SimpleAtoi(str, &value)) {
          return str;
        }
        if (!interval) {
          interval = IntInterval{value, value};
        }
        if (interval->min > value) {
          interval->min = value;
        }
        if (interval->max < value) {
          interval->max = value;
        }
      }
      if (interval) {
        return *interval;
      }
      return absl::nullopt;
    }
    default:
      LOG(FATAL) << "Unknown type: " << feature_stats_view.type();
  }
}