in src/main/scala/com/amazon/deequ/analyzers/DataType.scala [116:143]
def determineType(dist: Distribution): DataTypeInstances.Value = {
import DataTypeInstances._
// If all are unknown, we can't decide
if (ratioOf(Unknown, dist) == 1.0) {
Unknown
} else {
// If we saw string values or a mix of boolean and numbers, we decide for String
if (ratioOf(String, dist) > 0.0 ||
(ratioOf(Boolean, dist) > 0.0 &&
(ratioOf(Integral, dist) > 0.0 || ratioOf(Fractional, dist) > 0.0))) {
String
} else {
// If we have boolean (but no numbers, because we checked for that), we go with boolean
if (ratioOf(Boolean, dist) > 0.0) {
Boolean
} else {
// If we have seen one fractional, we go with that type
if (ratioOf(Fractional, dist) > 0.0) {
Fractional
} else {
Integral
}
}
}
}
}