public static void addToVector()

in hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java [87:349]


  public static void addToVector(TypeDescription type, ColumnVector colVector, Schema avroSchema, Object value, int vectorPos) {

    final int currentVecLength = colVector.isNull.length;
    if (vectorPos >= currentVecLength) {
      colVector.ensureSize(2 * currentVecLength, true);
    }
    if (value == null) {
      colVector.isNull[vectorPos] = true;
      colVector.noNulls = false;
      return;
    }

    if (avroSchema.getType().equals(Schema.Type.UNION)) {
      avroSchema = getActualSchemaType(avroSchema);
    }

    LogicalType logicalType = avroSchema != null ? avroSchema.getLogicalType() : null;

    switch (type.getCategory()) {
      case BOOLEAN:
        LongColumnVector boolVec = (LongColumnVector) colVector;
        boolVec.vector[vectorPos] = (boolean) value ? 1 : 0;
        break;
      case BYTE:
        LongColumnVector byteColVec = (LongColumnVector) colVector;
        byteColVec.vector[vectorPos] = (byte) value;
        break;
      case SHORT:
        LongColumnVector shortColVec = (LongColumnVector) colVector;
        shortColVec.vector[vectorPos] = (short) value;
        break;
      case INT:
        // the Avro logical type could be AvroTypeUtil.LOGICAL_TYPE_TIME_MILLIS, but we will ignore that fact here
        // since Orc has no way to represent a time in the way Avro defines it; we will simply preserve the int value
        LongColumnVector intColVec = (LongColumnVector) colVector;
        intColVec.vector[vectorPos] = (int) value;
        break;
      case LONG:
        // the Avro logical type could be AvroTypeUtil.LOGICAL_TYPE_TIME_MICROS, but we will ignore that fact here
        // since Orc has no way to represent a time in the way Avro defines it; we will simply preserve the long value
        LongColumnVector longColVec = (LongColumnVector) colVector;
        longColVec.vector[vectorPos] = (long) value;
        break;
      case FLOAT:
        DoubleColumnVector floatColVec = (DoubleColumnVector) colVector;
        floatColVec.vector[vectorPos] = (float) value;
        break;
      case DOUBLE:
        DoubleColumnVector doubleColVec = (DoubleColumnVector) colVector;
        doubleColVec.vector[vectorPos] = (double) value;
        break;
      case VARCHAR:
      case CHAR:
      case STRING:
        BytesColumnVector bytesColVec = (BytesColumnVector) colVector;
        byte[] bytes = null;

        if (value instanceof String) {
          bytes = getUTF8Bytes((String) value);
        } else if (value instanceof Utf8) {
          final Utf8 utf8 = (Utf8) value;
          bytes = utf8.getBytes();
        } else if (value instanceof GenericData.EnumSymbol) {
          bytes = getUTF8Bytes(value.toString());
        } else {
          throw new IllegalStateException(String.format(
              "Unrecognized type for Avro %s field value, which has type %s, value %s",
              type.getCategory().getName(),
              value.getClass().getName(),
              value
          ));
        }

        if (bytes == null) {
          bytesColVec.isNull[vectorPos] = true;
          bytesColVec.noNulls = false;
        } else {
          bytesColVec.setRef(vectorPos, bytes, 0, bytes.length);
        }
        break;
      case DATE:
        LongColumnVector dateColVec = (LongColumnVector) colVector;
        int daysSinceEpoch;
        if (logicalType instanceof LogicalTypes.Date) {
          daysSinceEpoch = (int) value;
        } else if (value instanceof java.sql.Date) {
          daysSinceEpoch = DateWritable.dateToDays((java.sql.Date) value);
        } else if (value instanceof Date) {
          daysSinceEpoch = DateWritable.millisToDays(((Date) value).getTime());
        } else {
          throw new IllegalStateException(String.format(
              "Unrecognized type for Avro DATE field value, which has type %s, value %s",
              value.getClass().getName(),
              value
          ));
        }
        dateColVec.vector[vectorPos] = daysSinceEpoch;
        break;
      case TIMESTAMP:
        TimestampColumnVector tsColVec = (TimestampColumnVector) colVector;

        long time;
        int nanos = 0;

        // The unit for Timestamp in ORC is millis, convert timestamp to millis if needed
        if (logicalType instanceof LogicalTypes.TimestampMillis) {
          time = (long) value;
        } else if (logicalType instanceof LogicalTypes.TimestampMicros) {
          final long logicalTsValue = (long) value;
          time = logicalTsValue / MICROS_PER_MILLI;
          nanos = NANOS_PER_MICRO * ((int) (logicalTsValue % MICROS_PER_MILLI));
        } else if (value instanceof Timestamp) {
          Timestamp tsValue = (Timestamp) value;
          time = tsValue.getTime();
          nanos = tsValue.getNanos();
        } else if (value instanceof java.sql.Date) {
          java.sql.Date sqlDateValue = (java.sql.Date) value;
          time = sqlDateValue.getTime();
        } else if (value instanceof Date) {
          Date dateValue = (Date) value;
          time = dateValue.getTime();
        } else {
          throw new IllegalStateException(String.format(
              "Unrecognized type for Avro TIMESTAMP field value, which has type %s, value %s",
              value.getClass().getName(),
              value
          ));
        }

        tsColVec.time[vectorPos] = time;
        tsColVec.nanos[vectorPos] = nanos;
        break;
      case BINARY:
        BytesColumnVector binaryColVec = (BytesColumnVector) colVector;

        byte[] binaryBytes;
        if (value instanceof GenericData.Fixed) {
          binaryBytes = ((GenericData.Fixed)value).bytes();
        } else if (value instanceof ByteBuffer) {
          final ByteBuffer byteBuffer = (ByteBuffer) value;
          binaryBytes = toBytes(byteBuffer);
        } else if (value instanceof byte[]) {
          binaryBytes = (byte[]) value;
        } else {
          throw new IllegalStateException(String.format(
              "Unrecognized type for Avro BINARY field value, which has type %s, value %s",
              value.getClass().getName(),
              value
          ));
        }
        binaryColVec.setRef(vectorPos, binaryBytes, 0, binaryBytes.length);
        break;
      case DECIMAL:
        DecimalColumnVector decimalColVec = (DecimalColumnVector) colVector;
        HiveDecimal decimalValue;
        if (value instanceof BigDecimal) {
          final BigDecimal decimal = (BigDecimal) value;
          decimalValue = HiveDecimal.create(decimal);
        } else if (value instanceof ByteBuffer) {
          final ByteBuffer byteBuffer = (ByteBuffer) value;
          final byte[] decimalBytes = new byte[byteBuffer.remaining()];
          byteBuffer.get(decimalBytes);
          final BigInteger bigInt = new BigInteger(decimalBytes);
          final int scale = type.getScale();
          BigDecimal bigDecVal = new BigDecimal(bigInt, scale);

          decimalValue = HiveDecimal.create(bigDecVal);
          if (decimalValue == null && decimalBytes.length > 0) {
            throw new IllegalStateException(
                "Unexpected read null HiveDecimal from bytes (base-64 encoded): "
                    + Base64.getEncoder().encodeToString(decimalBytes)
            );
          }
        } else if (value instanceof GenericData.Fixed) {
          final BigDecimal decimal = new Conversions.DecimalConversion()
              .fromFixed((GenericData.Fixed) value, avroSchema, logicalType);
          decimalValue = HiveDecimal.create(decimal);
        } else {
          throw new IllegalStateException(String.format(
              "Unexpected type for decimal (%s), cannot convert from Avro value",
              value.getClass().getCanonicalName()
          ));
        }
        if (decimalValue == null) {
          decimalColVec.isNull[vectorPos] = true;
          decimalColVec.noNulls = false;
        } else {
          decimalColVec.set(vectorPos, decimalValue);
        }
        break;
      case LIST:
        List<?> list = (List<?>) value;
        ListColumnVector listColVec = (ListColumnVector) colVector;
        listColVec.offsets[vectorPos] = listColVec.childCount;
        listColVec.lengths[vectorPos] = list.size();

        TypeDescription listType = type.getChildren().get(0);
        for (Object listItem : list) {
          addToVector(listType, listColVec.child, avroSchema.getElementType(), listItem, listColVec.childCount++);
        }
        break;
      case MAP:
        Map<String, ?> mapValue = (Map<String, ?>) value;

        MapColumnVector mapColumnVector = (MapColumnVector) colVector;
        mapColumnVector.offsets[vectorPos] = mapColumnVector.childCount;
        mapColumnVector.lengths[vectorPos] = mapValue.size();

        // keys are always strings
        Schema keySchema = Schema.create(Schema.Type.STRING);
        for (Map.Entry<String, ?> entry : mapValue.entrySet()) {
          addToVector(
              type.getChildren().get(0),
              mapColumnVector.keys,
              keySchema,
              entry.getKey(),
              mapColumnVector.childCount
          );

          addToVector(
              type.getChildren().get(1),
              mapColumnVector.values,
              avroSchema.getValueType(),
              entry.getValue(),
              mapColumnVector.childCount
          );

          mapColumnVector.childCount++;
        }

        break;
      case STRUCT:
        StructColumnVector structColVec = (StructColumnVector) colVector;

        GenericData.Record record = (GenericData.Record) value;

        for (int i = 0; i < type.getFieldNames().size(); i++) {
          String fieldName = type.getFieldNames().get(i);
          Object fieldValue = record.get(fieldName);
          TypeDescription fieldType = type.getChildren().get(i);
          addToVector(fieldType, structColVec.fields[i], avroSchema.getFields().get(i).schema(), fieldValue, vectorPos);
        }

        break;
      case UNION:
        UnionColumnVector unionColVec = (UnionColumnVector) colVector;

        List<TypeDescription> childTypes = type.getChildren();
        boolean added = addUnionValue(unionColVec, childTypes, avroSchema, value, vectorPos);

        if (!added) {
          throw new IllegalStateException(String.format(
              "Failed to add value %s to union with type %s",
              value == null ? "null" : value.toString(),
              type
          ));
        }

        break;
      default:
        throw new IllegalArgumentException("Invalid TypeDescription " + type + ".");
    }
  }