TypeHolder SmallestTypeFor()

in cpp/src/arrow/compute/expression.cc [385:519]


TypeHolder SmallestTypeFor(const arrow::Datum& value) {
  switch (value.type()->id()) {
    case Type::INT8:
      return int8();
    case Type::UINT8:
      return uint8();
    case Type::INT16: {
      int16_t i16 = value.scalar_as<Int16Scalar>().value;
      if (i16 <= std::numeric_limits<int8_t>::max() &&
          i16 >= std::numeric_limits<int8_t>::min()) {
        return int8();
      }
      return int16();
    }
    case Type::UINT16: {
      uint16_t ui16 = value.scalar_as<UInt16Scalar>().value;
      if (ui16 <= std::numeric_limits<uint8_t>::max()) {
        return uint8();
      }
      return uint16();
    }
    case Type::INT32: {
      int32_t i32 = value.scalar_as<Int32Scalar>().value;
      if (i32 <= std::numeric_limits<int8_t>::max() &&
          i32 >= std::numeric_limits<int8_t>::min()) {
        return int8();
      }
      if (i32 <= std::numeric_limits<int16_t>::max() &&
          i32 >= std::numeric_limits<int16_t>::min()) {
        return int16();
      }
      return int32();
    }
    case Type::UINT32: {
      uint32_t ui32 = value.scalar_as<UInt32Scalar>().value;
      if (ui32 <= std::numeric_limits<uint8_t>::max()) {
        return uint8();
      }
      if (ui32 <= std::numeric_limits<uint16_t>::max()) {
        return uint16();
      }
      return uint32();
    }
    case Type::INT64: {
      int64_t i64 = value.scalar_as<Int64Scalar>().value;
      if (i64 <= std::numeric_limits<int8_t>::max() &&
          i64 >= std::numeric_limits<int8_t>::min()) {
        return int8();
      }
      if (i64 <= std::numeric_limits<int16_t>::max() &&
          i64 >= std::numeric_limits<int16_t>::min()) {
        return int16();
      }
      if (i64 <= std::numeric_limits<int32_t>::max() &&
          i64 >= std::numeric_limits<int32_t>::min()) {
        return int32();
      }
      return int64();
    }
    case Type::UINT64: {
      uint64_t ui64 = value.scalar_as<UInt64Scalar>().value;
      if (ui64 <= std::numeric_limits<uint8_t>::max()) {
        return uint8();
      }
      if (ui64 <= std::numeric_limits<uint16_t>::max()) {
        return uint16();
      }
      if (ui64 <= std::numeric_limits<uint32_t>::max()) {
        return uint32();
      }
      return uint64();
    }
    case Type::DOUBLE: {
      double doub = value.scalar_as<DoubleScalar>().value;
      if (!std::isfinite(doub)) {
        // Special values can be float
        return float32();
      }
      // Test if float representation is the same
      if (static_cast<double>(static_cast<float>(doub)) == doub) {
        return float32();
      }
      return float64();
    }
    case Type::LARGE_STRING: {
      if (value.scalar_as<LargeStringScalar>().value->size() <=
          std::numeric_limits<int32_t>::max()) {
        return utf8();
      }
      return large_utf8();
    }
    case Type::LARGE_BINARY:
      if (value.scalar_as<LargeBinaryScalar>().value->size() <=
          std::numeric_limits<int32_t>::max()) {
        return binary();
      }
      return large_binary();
    case Type::TIMESTAMP: {
      const auto& ts_type = checked_pointer_cast<TimestampType>(value.type());
      uint64_t ts = value.scalar_as<TimestampScalar>().value;
      switch (ts_type->unit()) {
        case TimeUnit::SECOND:
          return value.type();
        case TimeUnit::MILLI:
          if (ts % 1000 == 0) {
            return timestamp(TimeUnit::SECOND, ts_type->timezone());
          }
          return value.type();
        case TimeUnit::MICRO:
          if (ts % 1000000 == 0) {
            return timestamp(TimeUnit::SECOND, ts_type->timezone());
          }
          if (ts % 1000 == 0) {
            return timestamp(TimeUnit::MILLI, ts_type->timezone());
          }
          return value.type();
        case TimeUnit::NANO:
          if (ts % 1000000000 == 0) {
            return timestamp(TimeUnit::SECOND, ts_type->timezone());
          }
          if (ts % 1000000 == 0) {
            return timestamp(TimeUnit::MILLI, ts_type->timezone());
          }
          if (ts % 1000 == 0) {
            return timestamp(TimeUnit::MICRO, ts_type->timezone());
          }
          return value.type();
        default:
          return value.type();
      }
    }
    default:
      return value.type();
  }
}