public static Object readFromVector()

in hudi-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java [476:603]


  public static Object readFromVector(TypeDescription type, ColumnVector colVector, Schema avroSchema, int vectorPos) {

    if (colVector.isRepeating) {
      vectorPos = 0;
    }

    if (colVector.isNull[vectorPos]) {
      return null;
    }

    if (avroSchema.getType().equals(Schema.Type.UNION)) {
      avroSchema = getActualSchemaType(avroSchema);
    }
    LogicalType logicalType = avroSchema != null ? avroSchema.getLogicalType() : null;

    switch (type.getCategory()) {
      case BOOLEAN:
        return ((LongColumnVector) colVector).vector[vectorPos] != 0;
      case BYTE:
        return (byte) ((LongColumnVector) colVector).vector[vectorPos];
      case SHORT:
        return (short) ((LongColumnVector) colVector).vector[vectorPos];
      case INT:
        return (int) ((LongColumnVector) colVector).vector[vectorPos];
      case LONG:
        return ((LongColumnVector) colVector).vector[vectorPos];
      case FLOAT:
        return (float) ((DoubleColumnVector) colVector).vector[vectorPos];
      case DOUBLE:
        return ((DoubleColumnVector) colVector).vector[vectorPos];
      case VARCHAR:
      case CHAR:
        int maxLength = type.getMaxLength();
        String result = ((BytesColumnVector) colVector).toString(vectorPos);
        if (result.length() <= maxLength) {
          return result;
        } else {
          throw new HoodieIOException("CHAR/VARCHAR has length " + result.length() + " greater than Max Length allowed");
        }
      case STRING:
        String stringType = avroSchema.getProp(GenericData.STRING_PROP);
        if (stringType == null || !stringType.equals(StringType.String)) {
          int stringLength = ((BytesColumnVector) colVector).length[vectorPos];
          int stringOffset = ((BytesColumnVector) colVector).start[vectorPos];
          byte[] stringBytes = new byte[stringLength];
          System.arraycopy(((BytesColumnVector) colVector).vector[vectorPos], stringOffset, stringBytes, 0, stringLength);
          return new Utf8(stringBytes);
        } else {
          return ((BytesColumnVector) colVector).toString(vectorPos);
        }
      case DATE:
        // convert to daysSinceEpoch for LogicalType.Date
        return (int) ((LongColumnVector) colVector).vector[vectorPos];
      case TIMESTAMP:
        // The unit of time in ORC is millis. Convert (time,nanos) to the desired unit per logicalType
        long time = ((TimestampColumnVector) colVector).time[vectorPos];
        int nanos = ((TimestampColumnVector) colVector).nanos[vectorPos];
        if (logicalType instanceof LogicalTypes.TimestampMillis) {
          return time;
        } else if (logicalType instanceof LogicalTypes.TimestampMicros) {
          return time * MICROS_PER_MILLI + nanos / NANOS_PER_MICRO;
        } else {
          return ((TimestampColumnVector) colVector).getTimestampAsLong(vectorPos);
        }
      case BINARY:
        int binaryLength = ((BytesColumnVector) colVector).length[vectorPos];
        int binaryOffset = ((BytesColumnVector) colVector).start[vectorPos];
        byte[] binaryBytes = new byte[binaryLength];
        System.arraycopy(((BytesColumnVector) colVector).vector[vectorPos], binaryOffset, binaryBytes, 0, binaryLength);
        // return a ByteBuffer to be consistent with AvroRecordConverter
        return ByteBuffer.wrap(binaryBytes);
      case DECIMAL:
        // HiveDecimal always ignores trailing zeros, thus modifies the scale implicitly,
        // therefore, the scale must be enforced here.
        BigDecimal bigDecimal = ((DecimalColumnVector) colVector).vector[vectorPos]
            .getHiveDecimal().bigDecimalValue()
            .setScale(((LogicalTypes.Decimal) logicalType).getScale());
        Schema.Type baseType = avroSchema.getType();
        if (baseType.equals(Schema.Type.FIXED)) {
          return new Conversions.DecimalConversion().toFixed(bigDecimal, avroSchema, logicalType);
        } else if (baseType.equals(Schema.Type.BYTES)) {
          return bigDecimal.unscaledValue().toByteArray();
        } else {
          throw new HoodieIOException(baseType.getName() + "is not a valid type for LogicalTypes.DECIMAL.");
        }
      case LIST:
        ArrayList<Object> list = new ArrayList<>();
        ListColumnVector listVector = (ListColumnVector) colVector;
        int listLength = (int) listVector.lengths[vectorPos];
        int listOffset = (int) listVector.offsets[vectorPos];
        list.ensureCapacity(listLength);
        TypeDescription childType = type.getChildren().get(0);
        for (int i = 0; i < listLength; i++) {
          list.add(readFromVector(childType, listVector.child, avroSchema.getElementType(), listOffset + i));
        }
        return list;
      case MAP:
        Map<String, Object> map = new HashMap<String, Object>();
        MapColumnVector mapVector = (MapColumnVector) colVector;
        int mapLength = (int) mapVector.lengths[vectorPos];
        int mapOffset = (int) mapVector.offsets[vectorPos];
        // keys are always strings for maps in Avro
        Schema keySchema = Schema.create(Schema.Type.STRING);
        for (int i = 0; i < mapLength; i++) {
          map.put(
              readFromVector(type.getChildren().get(0), mapVector.keys, keySchema, i + mapOffset).toString(),
              readFromVector(type.getChildren().get(1), mapVector.values,
                  avroSchema.getValueType(), i + mapOffset));
        }
        return map;
      case STRUCT:
        StructColumnVector structVector = (StructColumnVector) colVector;
        List<TypeDescription> children = type.getChildren();
        GenericData.Record record = new GenericData.Record(avroSchema);
        for (int i = 0; i < children.size(); i++) {
          record.put(i, readFromVector(children.get(i), structVector.fields[i],
              avroSchema.getFields().get(i).schema(), vectorPos));
        }
        return record;
      case UNION:
        UnionColumnVector unionVector = (UnionColumnVector) colVector;
        int tag = unionVector.tags[vectorPos];
        ColumnVector fieldVector = unionVector.fields[tag];
        return readFromVector(type.getChildren().get(tag), fieldVector, avroSchema.getTypes().get(tag), vectorPos);
      default:
        throw new HoodieIOException("Unrecognized TypeDescription " + type.toString());
    }
  }