static Status GetPandasWriterType()

in python/pyarrow/src/arrow/python/arrow_to_pandas.cc [2078:2247]


static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions& options,
                                  PandasWriter::type* output_type) {
#define INTEGER_CASE(NAME)                                                             \
  *output_type =                                                                       \
      data.null_count() > 0                                                            \
          ? options.integer_object_nulls ? PandasWriter::OBJECT : PandasWriter::DOUBLE \
          : PandasWriter::NAME;                                                        \
  break;

  switch (data.type()->id()) {
    case Type::BOOL:
      *output_type = data.null_count() > 0 ? PandasWriter::OBJECT : PandasWriter::BOOL;
      break;
    case Type::UINT8:
      INTEGER_CASE(UINT8);
    case Type::INT8:
      INTEGER_CASE(INT8);
    case Type::UINT16:
      INTEGER_CASE(UINT16);
    case Type::INT16:
      INTEGER_CASE(INT16);
    case Type::UINT32:
      INTEGER_CASE(UINT32);
    case Type::INT32:
      INTEGER_CASE(INT32);
    case Type::UINT64:
      INTEGER_CASE(UINT64);
    case Type::INT64:
      INTEGER_CASE(INT64);
    case Type::HALF_FLOAT:
      *output_type = PandasWriter::HALF_FLOAT;
      break;
    case Type::FLOAT:
      *output_type = PandasWriter::FLOAT;
      break;
    case Type::DOUBLE:
      *output_type = PandasWriter::DOUBLE;
      break;
    case Type::STRING:        // fall through
    case Type::LARGE_STRING:  // fall through
    case Type::STRING_VIEW:   // fall through
    case Type::BINARY:        // fall through
    case Type::LARGE_BINARY:
    case Type::BINARY_VIEW:
    case Type::NA:                       // fall through
    case Type::FIXED_SIZE_BINARY:        // fall through
    case Type::STRUCT:                   // fall through
    case Type::TIME32:                   // fall through
    case Type::TIME64:                   // fall through
    case Type::DECIMAL32:                // fall through
    case Type::DECIMAL64:                // fall through
    case Type::DECIMAL128:               // fall through
    case Type::DECIMAL256:               // fall through
    case Type::INTERVAL_MONTH_DAY_NANO:  // fall through
      *output_type = PandasWriter::OBJECT;
      break;
    case Type::DATE32:
      if (options.date_as_object) {
        *output_type = PandasWriter::OBJECT;
      } else if (options.coerce_temporal_nanoseconds) {
        *output_type = PandasWriter::DATETIME_NANO;
      } else if (options.to_numpy) {
        // Numpy supports Day, but Pandas does not
        *output_type = PandasWriter::DATETIME_DAY;
      } else {
        *output_type = PandasWriter::DATETIME_MILLI;
      }
      break;
    case Type::DATE64:
      if (options.date_as_object) {
        *output_type = PandasWriter::OBJECT;
      } else if (options.coerce_temporal_nanoseconds) {
        *output_type = PandasWriter::DATETIME_NANO;
      } else {
        *output_type = PandasWriter::DATETIME_MILLI;
      }
      break;
    case Type::TIMESTAMP: {
      const auto& ts_type = checked_cast<const TimestampType&>(*data.type());
      if (options.timestamp_as_object && ts_type.unit() != TimeUnit::NANO) {
        // Nanoseconds are never out of bounds for pandas, so in that case
        // we don't convert to object
        *output_type = PandasWriter::OBJECT;
      } else if (options.coerce_temporal_nanoseconds) {
        if (!ts_type.timezone().empty()) {
          *output_type = PandasWriter::DATETIME_NANO_TZ;
        } else {
          *output_type = PandasWriter::DATETIME_NANO;
        }
      } else {
        if (!ts_type.timezone().empty()) {
          switch (ts_type.unit()) {
            case TimeUnit::SECOND:
              *output_type = PandasWriter::DATETIME_SECOND_TZ;
              break;
            case TimeUnit::MILLI:
              *output_type = PandasWriter::DATETIME_MILLI_TZ;
              break;
            case TimeUnit::MICRO:
              *output_type = PandasWriter::DATETIME_MICRO_TZ;
              break;
            case TimeUnit::NANO:
              *output_type = PandasWriter::DATETIME_NANO_TZ;
              break;
          }
        } else {
          switch (ts_type.unit()) {
            case TimeUnit::SECOND:
              *output_type = PandasWriter::DATETIME_SECOND;
              break;
            case TimeUnit::MILLI:
              *output_type = PandasWriter::DATETIME_MILLI;
              break;
            case TimeUnit::MICRO:
              *output_type = PandasWriter::DATETIME_MICRO;
              break;
            case TimeUnit::NANO:
              *output_type = PandasWriter::DATETIME_NANO;
              break;
          }
        }
      }
    } break;
    case Type::DURATION: {
      const auto& dur_type = checked_cast<const DurationType&>(*data.type());
      if (options.coerce_temporal_nanoseconds) {
        *output_type = PandasWriter::TIMEDELTA_NANO;
      } else {
        switch (dur_type.unit()) {
          case TimeUnit::SECOND:
            *output_type = PandasWriter::TIMEDELTA_SECOND;
            break;
          case TimeUnit::MILLI:
            *output_type = PandasWriter::TIMEDELTA_MILLI;
            break;
          case TimeUnit::MICRO:
            *output_type = PandasWriter::TIMEDELTA_MICRO;
            break;
          case TimeUnit::NANO:
            *output_type = PandasWriter::TIMEDELTA_NANO;
            break;
        }
      }
    } break;
    case Type::FIXED_SIZE_LIST:
    case Type::LIST:
    case Type::LARGE_LIST:
    case Type::LIST_VIEW:
    case Type::LARGE_LIST_VIEW:
    case Type::MAP: {
      auto list_type = std::static_pointer_cast<BaseListType>(data.type());
      if (!ListTypeSupported(*list_type->value_type())) {
        return Status::NotImplemented("Not implemented type for Arrow list to pandas: ",
                                      list_type->value_type()->ToString());
      }
      *output_type = PandasWriter::OBJECT;
    } break;
    case Type::DICTIONARY:
      *output_type = PandasWriter::CATEGORICAL;
      break;
    case Type::EXTENSION:
      *output_type = PandasWriter::EXTENSION;
      break;
    default:
      return Status::NotImplemented(
          "No known equivalent Pandas block for Arrow data of type ",
          data.type()->ToString(), " is known.");
  }
  return Status::OK();
}