in python/pyarrow/src/arrow/python/arrow_to_pandas.cc [2078:2247]
static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions& options,
PandasWriter::type* output_type) {
#define INTEGER_CASE(NAME) \
*output_type = \
data.null_count() > 0 \
? options.integer_object_nulls ? PandasWriter::OBJECT : PandasWriter::DOUBLE \
: PandasWriter::NAME; \
break;
switch (data.type()->id()) {
case Type::BOOL:
*output_type = data.null_count() > 0 ? PandasWriter::OBJECT : PandasWriter::BOOL;
break;
case Type::UINT8:
INTEGER_CASE(UINT8);
case Type::INT8:
INTEGER_CASE(INT8);
case Type::UINT16:
INTEGER_CASE(UINT16);
case Type::INT16:
INTEGER_CASE(INT16);
case Type::UINT32:
INTEGER_CASE(UINT32);
case Type::INT32:
INTEGER_CASE(INT32);
case Type::UINT64:
INTEGER_CASE(UINT64);
case Type::INT64:
INTEGER_CASE(INT64);
case Type::HALF_FLOAT:
*output_type = PandasWriter::HALF_FLOAT;
break;
case Type::FLOAT:
*output_type = PandasWriter::FLOAT;
break;
case Type::DOUBLE:
*output_type = PandasWriter::DOUBLE;
break;
case Type::STRING: // fall through
case Type::LARGE_STRING: // fall through
case Type::STRING_VIEW: // fall through
case Type::BINARY: // fall through
case Type::LARGE_BINARY:
case Type::BINARY_VIEW:
case Type::NA: // fall through
case Type::FIXED_SIZE_BINARY: // fall through
case Type::STRUCT: // fall through
case Type::TIME32: // fall through
case Type::TIME64: // fall through
case Type::DECIMAL32: // fall through
case Type::DECIMAL64: // fall through
case Type::DECIMAL128: // fall through
case Type::DECIMAL256: // fall through
case Type::INTERVAL_MONTH_DAY_NANO: // fall through
*output_type = PandasWriter::OBJECT;
break;
case Type::DATE32:
if (options.date_as_object) {
*output_type = PandasWriter::OBJECT;
} else if (options.coerce_temporal_nanoseconds) {
*output_type = PandasWriter::DATETIME_NANO;
} else if (options.to_numpy) {
// Numpy supports Day, but Pandas does not
*output_type = PandasWriter::DATETIME_DAY;
} else {
*output_type = PandasWriter::DATETIME_MILLI;
}
break;
case Type::DATE64:
if (options.date_as_object) {
*output_type = PandasWriter::OBJECT;
} else if (options.coerce_temporal_nanoseconds) {
*output_type = PandasWriter::DATETIME_NANO;
} else {
*output_type = PandasWriter::DATETIME_MILLI;
}
break;
case Type::TIMESTAMP: {
const auto& ts_type = checked_cast<const TimestampType&>(*data.type());
if (options.timestamp_as_object && ts_type.unit() != TimeUnit::NANO) {
// Nanoseconds are never out of bounds for pandas, so in that case
// we don't convert to object
*output_type = PandasWriter::OBJECT;
} else if (options.coerce_temporal_nanoseconds) {
if (!ts_type.timezone().empty()) {
*output_type = PandasWriter::DATETIME_NANO_TZ;
} else {
*output_type = PandasWriter::DATETIME_NANO;
}
} else {
if (!ts_type.timezone().empty()) {
switch (ts_type.unit()) {
case TimeUnit::SECOND:
*output_type = PandasWriter::DATETIME_SECOND_TZ;
break;
case TimeUnit::MILLI:
*output_type = PandasWriter::DATETIME_MILLI_TZ;
break;
case TimeUnit::MICRO:
*output_type = PandasWriter::DATETIME_MICRO_TZ;
break;
case TimeUnit::NANO:
*output_type = PandasWriter::DATETIME_NANO_TZ;
break;
}
} else {
switch (ts_type.unit()) {
case TimeUnit::SECOND:
*output_type = PandasWriter::DATETIME_SECOND;
break;
case TimeUnit::MILLI:
*output_type = PandasWriter::DATETIME_MILLI;
break;
case TimeUnit::MICRO:
*output_type = PandasWriter::DATETIME_MICRO;
break;
case TimeUnit::NANO:
*output_type = PandasWriter::DATETIME_NANO;
break;
}
}
}
} break;
case Type::DURATION: {
const auto& dur_type = checked_cast<const DurationType&>(*data.type());
if (options.coerce_temporal_nanoseconds) {
*output_type = PandasWriter::TIMEDELTA_NANO;
} else {
switch (dur_type.unit()) {
case TimeUnit::SECOND:
*output_type = PandasWriter::TIMEDELTA_SECOND;
break;
case TimeUnit::MILLI:
*output_type = PandasWriter::TIMEDELTA_MILLI;
break;
case TimeUnit::MICRO:
*output_type = PandasWriter::TIMEDELTA_MICRO;
break;
case TimeUnit::NANO:
*output_type = PandasWriter::TIMEDELTA_NANO;
break;
}
}
} break;
case Type::FIXED_SIZE_LIST:
case Type::LIST:
case Type::LARGE_LIST:
case Type::LIST_VIEW:
case Type::LARGE_LIST_VIEW:
case Type::MAP: {
auto list_type = std::static_pointer_cast<BaseListType>(data.type());
if (!ListTypeSupported(*list_type->value_type())) {
return Status::NotImplemented("Not implemented type for Arrow list to pandas: ",
list_type->value_type()->ToString());
}
*output_type = PandasWriter::OBJECT;
} break;
case Type::DICTIONARY:
*output_type = PandasWriter::CATEGORICAL;
break;
case Type::EXTENSION:
*output_type = PandasWriter::EXTENSION;
break;
default:
return Status::NotImplemented(
"No known equivalent Pandas block for Arrow data of type ",
data.type()->ToString(), " is known.");
}
return Status::OK();
}