std::unique_ptr buildReader()

in c++/src/ColumnReader.cc [1717:1834]


  std::unique_ptr<ColumnReader> buildReader(const Type& type, StripeStreams& stripe,
                                            bool useTightNumericVector,
                                            bool throwOnSchemaEvolutionOverflow,
                                            bool convertToReadType) {
    if (convertToReadType && stripe.getSchemaEvolution() &&
        stripe.getSchemaEvolution()->needConvert(type)) {
      return buildConvertReader(type, stripe, useTightNumericVector,
                                throwOnSchemaEvolutionOverflow);
    }

    switch (static_cast<int64_t>(type.getKind())) {
      case SHORT:
        if (useTightNumericVector) {
          return std::make_unique<IntegerColumnReader<ShortVectorBatch>>(type, stripe);
        }
        return std::make_unique<IntegerColumnReader<LongVectorBatch>>(type, stripe);
      case INT:
        if (useTightNumericVector) {
          return std::make_unique<IntegerColumnReader<IntVectorBatch>>(type, stripe);
        }
        return std::make_unique<IntegerColumnReader<LongVectorBatch>>(type, stripe);
      case LONG:
      case DATE:
        return std::make_unique<IntegerColumnReader<LongVectorBatch>>(type, stripe);
      case BINARY:
      case CHAR:
      case STRING:
      case VARCHAR:
        switch (static_cast<int64_t>(stripe.getEncoding(type.getColumnId()).kind())) {
          case proto::ColumnEncoding_Kind_DICTIONARY:
          case proto::ColumnEncoding_Kind_DICTIONARY_V2:
            return std::make_unique<StringDictionaryColumnReader>(type, stripe);
          case proto::ColumnEncoding_Kind_DIRECT:
          case proto::ColumnEncoding_Kind_DIRECT_V2:
            return std::make_unique<StringDirectColumnReader>(type, stripe);
          default:
            throw NotImplementedYet("buildReader unhandled string encoding");
        }

      case BOOLEAN: {
        if (useTightNumericVector) {
          return std::make_unique<BooleanColumnReader<ByteVectorBatch>>(type, stripe);
        } else {
          return std::make_unique<BooleanColumnReader<LongVectorBatch>>(type, stripe);
        }
      }

      case BYTE:
        if (useTightNumericVector) {
          return std::make_unique<ByteColumnReader<ByteVectorBatch>>(type, stripe);
        }
        return std::make_unique<ByteColumnReader<LongVectorBatch>>(type, stripe);

      case LIST:
        return std::make_unique<ListColumnReader>(type, stripe, useTightNumericVector,
                                                  throwOnSchemaEvolutionOverflow);

      case MAP:
        return std::make_unique<MapColumnReader>(type, stripe, useTightNumericVector,
                                                 throwOnSchemaEvolutionOverflow);

      case UNION:
        return std::make_unique<UnionColumnReader>(type, stripe, useTightNumericVector,
                                                   throwOnSchemaEvolutionOverflow);

      case STRUCT:
        return std::make_unique<StructColumnReader>(type, stripe, useTightNumericVector,
                                                    throwOnSchemaEvolutionOverflow);

      case FLOAT: {
        if (useTightNumericVector) {
          if (isLittleEndian()) {
            return std::make_unique<DoubleColumnReader<FLOAT, true, float, FloatVectorBatch>>(
                type, stripe);
          }
          return std::make_unique<DoubleColumnReader<FLOAT, false, float, FloatVectorBatch>>(
              type, stripe);
        }
        if (isLittleEndian()) {
          return std::make_unique<DoubleColumnReader<FLOAT, true, double, DoubleVectorBatch>>(
              type, stripe);
        }
        return std::make_unique<DoubleColumnReader<FLOAT, false, double, DoubleVectorBatch>>(
            type, stripe);
      }
      case DOUBLE: {
        if (isLittleEndian()) {
          return std::make_unique<DoubleColumnReader<DOUBLE, true, double, DoubleVectorBatch>>(
              type, stripe);
        }
        return std::make_unique<DoubleColumnReader<DOUBLE, false, double, DoubleVectorBatch>>(
            type, stripe);
      }
      case TIMESTAMP:
        return std::make_unique<TimestampColumnReader>(type, stripe, false);

      case TIMESTAMP_INSTANT:
        return std::make_unique<TimestampColumnReader>(type, stripe, true);

      case DECIMAL:
        // is this a Hive 0.11 or 0.12 file?
        if (type.getPrecision() == 0) {
          return std::make_unique<DecimalHive11ColumnReader>(type, stripe);
        }
        // can we represent the values using int64_t?
        if (type.getPrecision() <= Decimal64ColumnReader::MAX_PRECISION_64) {
          if (stripe.isDecimalAsLong()) {
            return std::make_unique<Decimal64ColumnReaderV2>(type, stripe);
          }
          return std::make_unique<Decimal64ColumnReader>(type, stripe);
        }
        // otherwise we use the Int128 implementation
        return std::make_unique<Decimal128ColumnReader>(type, stripe);

      default:
        throw NotImplementedYet("buildReader unhandled type");
    }
  }