in tensorflow_data_validation/anomalies/int_domain_util.cc [68:124]
IntIntervalResult GetIntInterval(const FeatureStatsView& feature_stats_view) {
// Extract string values upfront as it can be useful for categorical INT
// features.
const std::vector<string> string_values =
feature_stats_view.GetStringValues();
switch (feature_stats_view.type()) {
case FeatureNameStatistics::STRUCT:
return absl::nullopt;
case FeatureNameStatistics::FLOAT:
return absl::nullopt;
case FeatureNameStatistics::INT: {
if (string_values.empty()) {
// IntDomain is interpreted as being castable to Int64, so we validate
// that this can be done and consider as a non-conformant IntDomain if
// it cannot. Note: if the IntDomain has no min and max specified, this
// will not trigger an anomaly.
if (feature_stats_view.num_stats().min() < LLONG_MIN) {
return std::to_string(feature_stats_view.num_stats().min());
}
if (feature_stats_view.num_stats().max() > LLONG_MAX) {
return std::to_string(feature_stats_view.num_stats().max());
}
return IntInterval{
static_cast<int64>(feature_stats_view.num_stats().min()),
static_cast<int64>(feature_stats_view.num_stats().max())};
}
// Intentionally fall through BYTES, STRING case for categorical integer
// features.
ABSL_FALLTHROUGH_INTENDED;
}
case FeatureNameStatistics::BYTES:
case FeatureNameStatistics::STRING: {
absl::optional<IntInterval> interval;
for (const string& str : string_values) {
int64 value;
if (!absl::SimpleAtoi(str, &value)) {
return str;
}
if (!interval) {
interval = IntInterval{value, value};
}
if (interval->min > value) {
interval->min = value;
}
if (interval->max < value) {
interval->max = value;
}
}
if (interval) {
return *interval;
}
return absl::nullopt;
}
default:
LOG(FATAL) << "Unknown type: " << feature_stats_view.type();
}
}