in velox/dwio/dwrf/writer/ColumnWriter.cpp [1933:2024]
std::unique_ptr<ColumnWriter> ColumnWriter::create(
WriterContext& context,
const TypeWithId& type,
const uint32_t sequence,
std::function<void(IndexBuilder&)> onRecordPosition) {
const auto flatMapEnabled = context.getConfig(Config::FLATTEN_MAP);
const auto& flatMapCols = context.getConfig(Config::MAP_FLAT_COLS);
// When flat map is enabled, all columns provided in the MAP_FLAT_COLS config,
// must be of MAP type. We only check top level columns (columns which are
// direct children of the root node).
if (flatMapEnabled && type.parent != nullptr && type.parent->id == 0 &&
type.type->kind() != TypeKind::MAP &&
std::find(flatMapCols.begin(), flatMapCols.end(), type.column) !=
flatMapCols.end()) {
DWIO_RAISE(fmt::format(
"MAP_FLAT_COLS contains column {}, but the root type of this column is {}."
" Column root types must be of type MAP",
type.column,
mapTypeKindToName(type.type->kind())));
}
switch (type.type->kind()) {
case TypeKind::BOOLEAN:
return std::make_unique<ByteRleColumnWriter<bool>>(
context, type, sequence, &createBooleanRleEncoder, onRecordPosition);
case TypeKind::TINYINT:
return std::make_unique<ByteRleColumnWriter<int8_t>>(
context, type, sequence, &createByteRleEncoder, onRecordPosition);
case TypeKind::SMALLINT:
return std::make_unique<IntegerColumnWriter<int16_t>>(
context, type, sequence, onRecordPosition);
case TypeKind::INTEGER:
return std::make_unique<IntegerColumnWriter<int32_t>>(
context, type, sequence, onRecordPosition);
case TypeKind::BIGINT:
return std::make_unique<IntegerColumnWriter<int64_t>>(
context, type, sequence, onRecordPosition);
case TypeKind::REAL:
return std::make_unique<FloatColumnWriter<float>>(
context, type, sequence, onRecordPosition);
case TypeKind::DOUBLE:
return std::make_unique<FloatColumnWriter<double>>(
context, type, sequence, onRecordPosition);
case TypeKind::VARCHAR:
return std::make_unique<StringColumnWriter>(
context, type, sequence, onRecordPosition);
case TypeKind::VARBINARY:
return std::make_unique<BinaryColumnWriter>(
context, type, sequence, onRecordPosition);
case TypeKind::TIMESTAMP:
return std::make_unique<TimestampColumnWriter>(
context, type, sequence, onRecordPosition);
case TypeKind::ROW: {
auto ret = std::make_unique<StructColumnWriter>(
context, type, sequence, onRecordPosition);
ret->children_.reserve(type.size());
for (int32_t i = 0; i < type.size(); ++i) {
ret->children_.push_back(create(context, *type.childAt(i), sequence));
}
return ret;
}
case TypeKind::MAP: {
DWIO_ENSURE_EQ(type.size(), 2, "Map should have exactly two children");
// We only flatten maps which are direct children of the root node.
// All other (nested) maps are treated as regular maps.
if (type.parent != nullptr && type.parent->id == 0 &&
context.getConfig(Config::FLATTEN_MAP) &&
std::find(flatMapCols.begin(), flatMapCols.end(), type.column) !=
flatMapCols.end()) {
DWIO_ENSURE(!onRecordPosition, "unexpected flat map nesting");
return FlatMapColumnWriter<TypeKind::INVALID>::create(
context, type, sequence);
}
auto ret = std::make_unique<MapColumnWriter>(
context, type, sequence, onRecordPosition);
ret->children_.push_back(create(context, *type.childAt(0), sequence));
ret->children_.push_back(create(context, *type.childAt(1), sequence));
return ret;
}
case TypeKind::ARRAY: {
DWIO_ENSURE_EQ(type.size(), 1, "Array should have exactly one child");
auto ret = std::make_unique<ListColumnWriter>(
context, type, sequence, onRecordPosition);
ret->children_.push_back(create(context, *type.childAt(0), sequence));
return ret;
}
default:
DWIO_RAISE("not supported yet ", mapTypeKindToName(type.type->kind()));
}
}