in arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java [261:344]
private void allocateVectorBasedOnOriginalType(PrimitiveType primitive, Field arrowField) {
switch (primitive.getOriginalType()) {
case ENUM:
case JSON:
case UTF8:
case BSON:
this.vec = arrowField.createVector(rootAlloc);
// TODO: Possibly use the uncompressed page size info to set the initial capacity
vec.setInitialCapacity(batchSize * AVERAGE_VARIABLE_WIDTH_RECORD_SIZE);
vec.allocateNewSafe();
this.readType = ReadType.VARCHAR;
this.typeWidth = UNKNOWN_WIDTH;
break;
case INT_8:
case INT_16:
case INT_32:
this.vec = arrowField.createVector(rootAlloc);
((IntVector) vec).allocateNew(batchSize);
this.readType = ReadType.INT;
this.typeWidth = (int) IntVector.TYPE_WIDTH;
break;
case DATE:
this.vec = arrowField.createVector(rootAlloc);
((DateDayVector) vec).allocateNew(batchSize);
this.readType = ReadType.INT;
this.typeWidth = (int) IntVector.TYPE_WIDTH;
break;
case INT_64:
this.vec = arrowField.createVector(rootAlloc);
((BigIntVector) vec).allocateNew(batchSize);
this.readType = ReadType.LONG;
this.typeWidth = (int) BigIntVector.TYPE_WIDTH;
break;
case TIMESTAMP_MILLIS:
this.vec = arrowField.createVector(rootAlloc);
((BigIntVector) vec).allocateNew(batchSize);
this.readType = ReadType.TIMESTAMP_MILLIS;
this.typeWidth = (int) BigIntVector.TYPE_WIDTH;
break;
case TIMESTAMP_MICROS:
this.vec = arrowField.createVector(rootAlloc);
if (((Types.TimestampType) icebergField.type()).shouldAdjustToUTC()) {
((TimeStampMicroTZVector) vec).allocateNew(batchSize);
} else {
((TimeStampMicroVector) vec).allocateNew(batchSize);
}
this.readType = ReadType.LONG;
this.typeWidth = (int) BigIntVector.TYPE_WIDTH;
break;
case TIME_MICROS:
this.vec = arrowField.createVector(rootAlloc);
((TimeMicroVector) vec).allocateNew(batchSize);
this.readType = ReadType.LONG;
this.typeWidth = (int) TimeMicroVector.TYPE_WIDTH;
break;
case DECIMAL:
this.vec = arrowField.createVector(rootAlloc);
switch (primitive.getPrimitiveTypeName()) {
case BINARY:
case FIXED_LEN_BYTE_ARRAY:
((FixedSizeBinaryVector) vec).allocateNew(batchSize);
this.readType = ReadType.FIXED_LENGTH_DECIMAL;
this.typeWidth = primitive.getTypeLength();
break;
case INT64:
((BigIntVector) vec).allocateNew(batchSize);
this.readType = ReadType.LONG_BACKED_DECIMAL;
this.typeWidth = (int) BigIntVector.TYPE_WIDTH;
break;
case INT32:
((IntVector) vec).allocateNew(batchSize);
this.readType = ReadType.INT_BACKED_DECIMAL;
this.typeWidth = (int) IntVector.TYPE_WIDTH;
break;
default:
throw new UnsupportedOperationException(
"Unsupported base type for decimal: " + primitive.getPrimitiveTypeName());
}
break;
default:
throw new UnsupportedOperationException(
"Unsupported logical type: " + primitive.getOriginalType());
}
}