in src/parquet/arrow/writer.cc [859:927]
Status ArrowColumnWriter::Write(const Array& data) {
::arrow::Type::type values_type;
RETURN_NOT_OK(GetLeafType(*data.type(), &values_type));
std::shared_ptr<Array> _values_array;
int64_t values_offset;
int64_t num_levels;
int64_t num_values;
LevelBuilder level_builder(ctx_->memory_pool);
std::shared_ptr<Buffer> def_levels_buffer, rep_levels_buffer;
RETURN_NOT_OK(level_builder.GenerateLevels(
data, field_, &values_offset, &num_values, &num_levels, ctx_->def_levels_buffer,
&def_levels_buffer, &rep_levels_buffer, &_values_array));
const int16_t* def_levels = nullptr;
if (def_levels_buffer) {
def_levels = reinterpret_cast<const int16_t*>(def_levels_buffer->data());
}
const int16_t* rep_levels = nullptr;
if (rep_levels_buffer) {
rep_levels = reinterpret_cast<const int16_t*>(rep_levels_buffer->data());
}
std::shared_ptr<Array> values_array = _values_array->Slice(values_offset, num_values);
#define WRITE_BATCH_CASE(ArrowEnum, ArrowType, ParquetType) \
case ::arrow::Type::ArrowEnum: \
return TypedWriteBatch<ParquetType, ::arrow::ArrowType>(*values_array, num_levels, \
def_levels, rep_levels);
switch (values_type) {
case ::arrow::Type::UINT32: {
if (writer_->properties()->version() == ParquetVersion::PARQUET_1_0) {
// Parquet 1.0 reader cannot read the UINT_32 logical type. Thus we need
// to use the larger Int64Type to store them lossless.
return TypedWriteBatch<Int64Type, ::arrow::UInt32Type>(*values_array, num_levels,
def_levels, rep_levels);
} else {
return TypedWriteBatch<Int32Type, ::arrow::UInt32Type>(*values_array, num_levels,
def_levels, rep_levels);
}
}
WRITE_BATCH_CASE(NA, NullType, Int32Type)
case ::arrow::Type::TIMESTAMP:
return WriteTimestamps(*values_array, num_levels, def_levels, rep_levels);
WRITE_BATCH_CASE(BOOL, BooleanType, BooleanType)
WRITE_BATCH_CASE(INT8, Int8Type, Int32Type)
WRITE_BATCH_CASE(UINT8, UInt8Type, Int32Type)
WRITE_BATCH_CASE(INT16, Int16Type, Int32Type)
WRITE_BATCH_CASE(UINT16, UInt16Type, Int32Type)
WRITE_BATCH_CASE(INT32, Int32Type, Int32Type)
WRITE_BATCH_CASE(INT64, Int64Type, Int64Type)
WRITE_BATCH_CASE(UINT64, UInt64Type, Int64Type)
WRITE_BATCH_CASE(FLOAT, FloatType, FloatType)
WRITE_BATCH_CASE(DOUBLE, DoubleType, DoubleType)
WRITE_BATCH_CASE(BINARY, BinaryType, ByteArrayType)
WRITE_BATCH_CASE(STRING, BinaryType, ByteArrayType)
WRITE_BATCH_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryType, FLBAType)
WRITE_BATCH_CASE(DECIMAL, Decimal128Type, FLBAType)
WRITE_BATCH_CASE(DATE32, Date32Type, Int32Type)
WRITE_BATCH_CASE(DATE64, Date64Type, Int32Type)
WRITE_BATCH_CASE(TIME32, Time32Type, Int32Type)
WRITE_BATCH_CASE(TIME64, Time64Type, Int64Type)
default:
break;
}
std::stringstream ss;
ss << "Data type not supported as list value: " << values_array->type()->ToString();
return Status::NotImplemented(ss.str());
}