std::unique_ptr ColumnWriter::create()

in velox/dwio/dwrf/writer/ColumnWriter.cpp [1933:2024]


std::unique_ptr<ColumnWriter> ColumnWriter::create(
    WriterContext& context,
    const TypeWithId& type,
    const uint32_t sequence,
    std::function<void(IndexBuilder&)> onRecordPosition) {
  const auto flatMapEnabled = context.getConfig(Config::FLATTEN_MAP);
  const auto& flatMapCols = context.getConfig(Config::MAP_FLAT_COLS);

  // When flat map is enabled, all columns provided in the MAP_FLAT_COLS config,
  // must be of MAP type. We only check top level columns (columns which are
  // direct children of the root node).
  if (flatMapEnabled && type.parent != nullptr && type.parent->id == 0 &&
      type.type->kind() != TypeKind::MAP &&
      std::find(flatMapCols.begin(), flatMapCols.end(), type.column) !=
          flatMapCols.end()) {
    DWIO_RAISE(fmt::format(
        "MAP_FLAT_COLS contains column {}, but the root type of this column is {}."
        " Column root types must be of type MAP",
        type.column,
        mapTypeKindToName(type.type->kind())));
  }

  switch (type.type->kind()) {
    case TypeKind::BOOLEAN:
      return std::make_unique<ByteRleColumnWriter<bool>>(
          context, type, sequence, &createBooleanRleEncoder, onRecordPosition);
    case TypeKind::TINYINT:
      return std::make_unique<ByteRleColumnWriter<int8_t>>(
          context, type, sequence, &createByteRleEncoder, onRecordPosition);
    case TypeKind::SMALLINT:
      return std::make_unique<IntegerColumnWriter<int16_t>>(
          context, type, sequence, onRecordPosition);
    case TypeKind::INTEGER:
      return std::make_unique<IntegerColumnWriter<int32_t>>(
          context, type, sequence, onRecordPosition);
    case TypeKind::BIGINT:
      return std::make_unique<IntegerColumnWriter<int64_t>>(
          context, type, sequence, onRecordPosition);
    case TypeKind::REAL:
      return std::make_unique<FloatColumnWriter<float>>(
          context, type, sequence, onRecordPosition);
    case TypeKind::DOUBLE:
      return std::make_unique<FloatColumnWriter<double>>(
          context, type, sequence, onRecordPosition);
    case TypeKind::VARCHAR:
      return std::make_unique<StringColumnWriter>(
          context, type, sequence, onRecordPosition);
    case TypeKind::VARBINARY:
      return std::make_unique<BinaryColumnWriter>(
          context, type, sequence, onRecordPosition);
    case TypeKind::TIMESTAMP:
      return std::make_unique<TimestampColumnWriter>(
          context, type, sequence, onRecordPosition);
    case TypeKind::ROW: {
      auto ret = std::make_unique<StructColumnWriter>(
          context, type, sequence, onRecordPosition);
      ret->children_.reserve(type.size());
      for (int32_t i = 0; i < type.size(); ++i) {
        ret->children_.push_back(create(context, *type.childAt(i), sequence));
      }
      return ret;
    }
    case TypeKind::MAP: {
      DWIO_ENSURE_EQ(type.size(), 2, "Map should have exactly two children");

      // We only flatten maps which are direct children of the root node.
      // All other (nested) maps are treated as regular maps.
      if (type.parent != nullptr && type.parent->id == 0 &&
          context.getConfig(Config::FLATTEN_MAP) &&
          std::find(flatMapCols.begin(), flatMapCols.end(), type.column) !=
              flatMapCols.end()) {
        DWIO_ENSURE(!onRecordPosition, "unexpected flat map nesting");
        return FlatMapColumnWriter<TypeKind::INVALID>::create(
            context, type, sequence);
      }
      auto ret = std::make_unique<MapColumnWriter>(
          context, type, sequence, onRecordPosition);
      ret->children_.push_back(create(context, *type.childAt(0), sequence));
      ret->children_.push_back(create(context, *type.childAt(1), sequence));
      return ret;
    }
    case TypeKind::ARRAY: {
      DWIO_ENSURE_EQ(type.size(), 1, "Array should have exactly one child");
      auto ret = std::make_unique<ListColumnWriter>(
          context, type, sequence, onRecordPosition);
      ret->children_.push_back(create(context, *type.childAt(0), sequence));
      return ret;
    }
    default:
      DWIO_RAISE("not supported yet ", mapTypeKindToName(type.type->kind()));
  }
}