Status ArrowColumnWriter::Write()

in src/parquet/arrow/writer.cc [859:927]


Status ArrowColumnWriter::Write(const Array& data) {
  ::arrow::Type::type values_type;
  RETURN_NOT_OK(GetLeafType(*data.type(), &values_type));

  std::shared_ptr<Array> _values_array;
  int64_t values_offset;
  int64_t num_levels;
  int64_t num_values;
  LevelBuilder level_builder(ctx_->memory_pool);

  std::shared_ptr<Buffer> def_levels_buffer, rep_levels_buffer;
  RETURN_NOT_OK(level_builder.GenerateLevels(
      data, field_, &values_offset, &num_values, &num_levels, ctx_->def_levels_buffer,
      &def_levels_buffer, &rep_levels_buffer, &_values_array));
  const int16_t* def_levels = nullptr;
  if (def_levels_buffer) {
    def_levels = reinterpret_cast<const int16_t*>(def_levels_buffer->data());
  }
  const int16_t* rep_levels = nullptr;
  if (rep_levels_buffer) {
    rep_levels = reinterpret_cast<const int16_t*>(rep_levels_buffer->data());
  }
  std::shared_ptr<Array> values_array = _values_array->Slice(values_offset, num_values);

#define WRITE_BATCH_CASE(ArrowEnum, ArrowType, ParquetType)                            \
  case ::arrow::Type::ArrowEnum:                                                       \
    return TypedWriteBatch<ParquetType, ::arrow::ArrowType>(*values_array, num_levels, \
                                                            def_levels, rep_levels);

  switch (values_type) {
    case ::arrow::Type::UINT32: {
      if (writer_->properties()->version() == ParquetVersion::PARQUET_1_0) {
        // Parquet 1.0 reader cannot read the UINT_32 logical type. Thus we need
        // to use the larger Int64Type to store them lossless.
        return TypedWriteBatch<Int64Type, ::arrow::UInt32Type>(*values_array, num_levels,
                                                               def_levels, rep_levels);
      } else {
        return TypedWriteBatch<Int32Type, ::arrow::UInt32Type>(*values_array, num_levels,
                                                               def_levels, rep_levels);
      }
    }
      WRITE_BATCH_CASE(NA, NullType, Int32Type)
    case ::arrow::Type::TIMESTAMP:
      return WriteTimestamps(*values_array, num_levels, def_levels, rep_levels);
      WRITE_BATCH_CASE(BOOL, BooleanType, BooleanType)
      WRITE_BATCH_CASE(INT8, Int8Type, Int32Type)
      WRITE_BATCH_CASE(UINT8, UInt8Type, Int32Type)
      WRITE_BATCH_CASE(INT16, Int16Type, Int32Type)
      WRITE_BATCH_CASE(UINT16, UInt16Type, Int32Type)
      WRITE_BATCH_CASE(INT32, Int32Type, Int32Type)
      WRITE_BATCH_CASE(INT64, Int64Type, Int64Type)
      WRITE_BATCH_CASE(UINT64, UInt64Type, Int64Type)
      WRITE_BATCH_CASE(FLOAT, FloatType, FloatType)
      WRITE_BATCH_CASE(DOUBLE, DoubleType, DoubleType)
      WRITE_BATCH_CASE(BINARY, BinaryType, ByteArrayType)
      WRITE_BATCH_CASE(STRING, BinaryType, ByteArrayType)
      WRITE_BATCH_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryType, FLBAType)
      WRITE_BATCH_CASE(DECIMAL, Decimal128Type, FLBAType)
      WRITE_BATCH_CASE(DATE32, Date32Type, Int32Type)
      WRITE_BATCH_CASE(DATE64, Date64Type, Int32Type)
      WRITE_BATCH_CASE(TIME32, Time32Type, Int32Type)
      WRITE_BATCH_CASE(TIME64, Time64Type, Int64Type)
    default:
      break;
  }
  std::stringstream ss;
  ss << "Data type not supported as list value: " << values_array->type()->ToString();
  return Status::NotImplemented(ss.str());
}