in arrow/src/main/java/org/apache/iceberg/arrow/vectorized/VectorizedArrowReader.java [346:429]
private void allocateVectorBasedOnTypeName(PrimitiveType primitive, Field arrowField) {
switch (primitive.getPrimitiveTypeName()) {
case FIXED_LEN_BYTE_ARRAY:
int len;
if (icebergField.type() instanceof Types.UUIDType) {
len = 16;
this.readType = ReadType.UUID;
} else {
len = ((Types.FixedType) icebergField.type()).length();
this.readType = ReadType.FIXED_WIDTH_BINARY;
}
this.vec = arrowField.createVector(rootAlloc);
vec.setInitialCapacity(batchSize * len);
vec.allocateNew();
this.typeWidth = len;
break;
case BINARY:
this.vec = arrowField.createVector(rootAlloc);
// TODO: Possibly use the uncompressed page size info to set the initial capacity
vec.setInitialCapacity(batchSize * AVERAGE_VARIABLE_WIDTH_RECORD_SIZE);
vec.allocateNewSafe();
this.readType = ReadType.VARBINARY;
this.typeWidth = UNKNOWN_WIDTH;
break;
case INT32:
Field intField =
new Field(
icebergField.name(),
new FieldType(
icebergField.isOptional(), new ArrowType.Int(Integer.SIZE, true), null, null),
null);
this.vec = intField.createVector(rootAlloc);
((IntVector) vec).allocateNew(batchSize);
this.readType = ReadType.INT;
this.typeWidth = (int) IntVector.TYPE_WIDTH;
break;
case INT96:
// Impala & Spark used to write timestamps as INT96 by default. For backwards
// compatibility we try to read INT96 as timestamps. But INT96 is not recommended
// and deprecated (see https://issues.apache.org/jira/browse/PARQUET-323)
int length = BigIntVector.TYPE_WIDTH;
this.readType = ReadType.TIMESTAMP_INT96;
this.vec = arrowField.createVector(rootAlloc);
vec.setInitialCapacity(batchSize * length);
vec.allocateNew();
this.typeWidth = length;
break;
case FLOAT:
Field floatField =
new Field(
icebergField.name(),
new FieldType(
icebergField.isOptional(),
new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE),
null,
null),
null);
this.vec = floatField.createVector(rootAlloc);
((Float4Vector) vec).allocateNew(batchSize);
this.readType = ReadType.FLOAT;
this.typeWidth = (int) Float4Vector.TYPE_WIDTH;
break;
case BOOLEAN:
this.vec = arrowField.createVector(rootAlloc);
((BitVector) vec).allocateNew(batchSize);
this.readType = ReadType.BOOLEAN;
this.typeWidth = UNKNOWN_WIDTH;
break;
case INT64:
this.vec = arrowField.createVector(rootAlloc);
((BigIntVector) vec).allocateNew(batchSize);
this.readType = ReadType.LONG;
this.typeWidth = (int) BigIntVector.TYPE_WIDTH;
break;
case DOUBLE:
this.vec = arrowField.createVector(rootAlloc);
((Float8Vector) vec).allocateNew(batchSize);
this.readType = ReadType.DOUBLE;
this.typeWidth = (int) Float8Vector.TYPE_WIDTH;
break;
default:
throw new UnsupportedOperationException("Unsupported type: " + primitive);
}
}