in c++/src/ColumnReader.cc [1717:1834]
std::unique_ptr<ColumnReader> buildReader(const Type& type, StripeStreams& stripe,
bool useTightNumericVector,
bool throwOnSchemaEvolutionOverflow,
bool convertToReadType) {
if (convertToReadType && stripe.getSchemaEvolution() &&
stripe.getSchemaEvolution()->needConvert(type)) {
return buildConvertReader(type, stripe, useTightNumericVector,
throwOnSchemaEvolutionOverflow);
}
switch (static_cast<int64_t>(type.getKind())) {
case SHORT:
if (useTightNumericVector) {
return std::make_unique<IntegerColumnReader<ShortVectorBatch>>(type, stripe);
}
return std::make_unique<IntegerColumnReader<LongVectorBatch>>(type, stripe);
case INT:
if (useTightNumericVector) {
return std::make_unique<IntegerColumnReader<IntVectorBatch>>(type, stripe);
}
return std::make_unique<IntegerColumnReader<LongVectorBatch>>(type, stripe);
case LONG:
case DATE:
return std::make_unique<IntegerColumnReader<LongVectorBatch>>(type, stripe);
case BINARY:
case CHAR:
case STRING:
case VARCHAR:
switch (static_cast<int64_t>(stripe.getEncoding(type.getColumnId()).kind())) {
case proto::ColumnEncoding_Kind_DICTIONARY:
case proto::ColumnEncoding_Kind_DICTIONARY_V2:
return std::make_unique<StringDictionaryColumnReader>(type, stripe);
case proto::ColumnEncoding_Kind_DIRECT:
case proto::ColumnEncoding_Kind_DIRECT_V2:
return std::make_unique<StringDirectColumnReader>(type, stripe);
default:
throw NotImplementedYet("buildReader unhandled string encoding");
}
case BOOLEAN: {
if (useTightNumericVector) {
return std::make_unique<BooleanColumnReader<ByteVectorBatch>>(type, stripe);
} else {
return std::make_unique<BooleanColumnReader<LongVectorBatch>>(type, stripe);
}
}
case BYTE:
if (useTightNumericVector) {
return std::make_unique<ByteColumnReader<ByteVectorBatch>>(type, stripe);
}
return std::make_unique<ByteColumnReader<LongVectorBatch>>(type, stripe);
case LIST:
return std::make_unique<ListColumnReader>(type, stripe, useTightNumericVector,
throwOnSchemaEvolutionOverflow);
case MAP:
return std::make_unique<MapColumnReader>(type, stripe, useTightNumericVector,
throwOnSchemaEvolutionOverflow);
case UNION:
return std::make_unique<UnionColumnReader>(type, stripe, useTightNumericVector,
throwOnSchemaEvolutionOverflow);
case STRUCT:
return std::make_unique<StructColumnReader>(type, stripe, useTightNumericVector,
throwOnSchemaEvolutionOverflow);
case FLOAT: {
if (useTightNumericVector) {
if (isLittleEndian()) {
return std::make_unique<DoubleColumnReader<FLOAT, true, float, FloatVectorBatch>>(
type, stripe);
}
return std::make_unique<DoubleColumnReader<FLOAT, false, float, FloatVectorBatch>>(
type, stripe);
}
if (isLittleEndian()) {
return std::make_unique<DoubleColumnReader<FLOAT, true, double, DoubleVectorBatch>>(
type, stripe);
}
return std::make_unique<DoubleColumnReader<FLOAT, false, double, DoubleVectorBatch>>(
type, stripe);
}
case DOUBLE: {
if (isLittleEndian()) {
return std::make_unique<DoubleColumnReader<DOUBLE, true, double, DoubleVectorBatch>>(
type, stripe);
}
return std::make_unique<DoubleColumnReader<DOUBLE, false, double, DoubleVectorBatch>>(
type, stripe);
}
case TIMESTAMP:
return std::make_unique<TimestampColumnReader>(type, stripe, false);
case TIMESTAMP_INSTANT:
return std::make_unique<TimestampColumnReader>(type, stripe, true);
case DECIMAL:
// is this a Hive 0.11 or 0.12 file?
if (type.getPrecision() == 0) {
return std::make_unique<DecimalHive11ColumnReader>(type, stripe);
}
// can we represent the values using int64_t?
if (type.getPrecision() <= Decimal64ColumnReader::MAX_PRECISION_64) {
if (stripe.isDecimalAsLong()) {
return std::make_unique<Decimal64ColumnReaderV2>(type, stripe);
}
return std::make_unique<Decimal64ColumnReader>(type, stripe);
}
// otherwise we use the Int128 implementation
return std::make_unique<Decimal128ColumnReader>(type, stripe);
default:
throw NotImplementedYet("buildReader unhandled type");
}
}