in api/src/main/java/org/apache/iceberg/types/TypeUtil.java [512:568]
private static int estimateSize(Type type) {
switch (type.typeId()) {
case BOOLEAN:
// the size of a boolean variable is virtual machine dependent
// it is common to believe booleans occupy 1 byte in most JVMs
return 1;
case INTEGER:
case FLOAT:
case DATE:
// ints and floats occupy 4 bytes
// dates are internally represented as ints
return 4;
case LONG:
case DOUBLE:
case TIME:
case TIMESTAMP:
case TIMESTAMP_NANO:
// longs and doubles occupy 8 bytes
// times and timestamps are internally represented as longs
return 8;
case STRING:
// 12 (header) + 6 (fields) + 16 (array overhead) + 20 (10 chars, 2 bytes each) = 54 bytes
return 54;
case UUID:
// 12 (header) + 16 (two long variables) = 28 bytes
return 28;
case FIXED:
return ((Types.FixedType) type).length();
case BINARY:
case VARIANT:
return 80;
case GEOMETRY:
case GEOGRAPHY:
// 80 bytes is an approximate size for a polygon or linestring with 4 to 5 coordinates.
// This is a reasonable estimate for the size of a geometry or geography object without
// additional details.
return 80;
case UNKNOWN:
// Consider Unknown as null
return 0;
case DECIMAL:
// 12 (header) + (12 + 12 + 4) (BigInteger) + 4 (scale) = 44 bytes
return 44;
case STRUCT:
Types.StructType struct = (Types.StructType) type;
return HEADER_SIZE + struct.fields().stream().mapToInt(TypeUtil::estimateSize).sum();
case LIST:
Types.ListType list = (Types.ListType) type;
return HEADER_SIZE + 5 * estimateSize(list.elementType());
case MAP:
Types.MapType map = (Types.MapType) type;
int entrySize = HEADER_SIZE + estimateSize(map.keyType()) + estimateSize(map.valueType());
return HEADER_SIZE + 5 * entrySize;
default:
return 16;
}
}