in extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java [348:493]
private Object convertPrimitiveField(Group g, int fieldIndex, int index)
{
PrimitiveType pt = (PrimitiveType) g.getType().getFields().get(fieldIndex);
OriginalType ot = pt.getOriginalType();
try {
if (ot != null) {
// convert logical types
switch (ot) {
case DATE:
long ts = convertDateToMillis(g.getInteger(fieldIndex, index));
return ts;
case TIME_MICROS:
return g.getLong(fieldIndex, index);
case TIME_MILLIS:
return g.getInteger(fieldIndex, index);
case TIMESTAMP_MICROS:
return TimeUnit.MILLISECONDS.convert(g.getLong(fieldIndex, index), TimeUnit.MICROSECONDS);
case TIMESTAMP_MILLIS:
return g.getLong(fieldIndex, index);
case INTERVAL:
/*
INTERVAL is used for an interval of time. It must annotate a fixed_len_byte_array of length 12.
This array stores three little-endian unsigned integers that represent durations at different
granularities of time. The first stores a number in months, the second stores a number in days,
and the third stores a number in milliseconds. This representation is independent of any particular
timezone or date.
Each component in this representation is independent of the others. For example, there is no
requirement that a large number of days should be expressed as a mix of months and days because there is
not a constant conversion from days to months.
The sort order used for INTERVAL is undefined. When writing data, no min/max statistics should be
saved for this type and if such non-compliant statistics are found during reading, they must be ignored.
*/
Binary intervalVal = g.getBinary(fieldIndex, index);
IntBuffer intBuf = intervalVal.toByteBuffer().order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
int months = intBuf.get(0);
int days = intBuf.get(1);
int millis = intBuf.get(2);
StringBuilder periodBuilder = new StringBuilder("P");
if (months > 0) {
periodBuilder.append(months).append("M");
}
if (days > 0) {
periodBuilder.append(days).append("D");
}
if (periodBuilder.length() > 1) {
Period p = Period.parse(periodBuilder.toString());
Duration d = p.toStandardDuration().plus(millis);
return d;
} else {
return new Duration(millis);
}
case INT_8:
case INT_16:
case INT_32:
return g.getInteger(fieldIndex, index);
case INT_64:
return g.getLong(fieldIndex, index);
case UINT_8:
case UINT_16:
return g.getInteger(fieldIndex, index);
case UINT_32:
return Integer.toUnsignedLong(g.getInteger(fieldIndex, index));
case UINT_64:
return g.getLong(fieldIndex, index);
case DECIMAL:
/*
DECIMAL can be used to annotate the following types:
int32: for 1 <= precision <= 9
int64: for 1 <= precision <= 18; precision < 10 will produce a warning
fixed_len_byte_array: precision is limited by the array size. Length n can
store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits
binary: precision is not limited, but is required. The minimum number of bytes to store
the unscaled value should be used.
*/
int precision = pt.asPrimitiveType().getDecimalMetadata().getPrecision();
int scale = pt.asPrimitiveType().getDecimalMetadata().getScale();
switch (pt.getPrimitiveTypeName()) {
case INT32:
// The primitive returned from Group is an unscaledValue.
// We need to do unscaledValue * 10^(-scale) to convert back to decimal
return new BigDecimal(g.getInteger(fieldIndex, index)).movePointLeft(scale);
case INT64:
// The primitive returned from Group is an unscaledValue.
// We need to do unscaledValue * 10^(-scale) to convert back to decimal
return new BigDecimal(g.getLong(fieldIndex, index)).movePointLeft(scale);
case FIXED_LEN_BYTE_ARRAY:
case BINARY:
Binary value = g.getBinary(fieldIndex, index);
return convertBinaryToDecimal(value, precision, scale);
default:
throw new RE(
"Unknown 'DECIMAL' type supplied to primitive conversion: %s (this should never happen)",
pt.getPrimitiveTypeName()
);
}
case UTF8:
case ENUM:
case JSON:
return g.getString(fieldIndex, index);
case LIST:
case MAP:
case MAP_KEY_VALUE:
case BSON:
default:
throw new RE(
"Non-primitive supplied to primitive conversion: %s (this should never happen)",
ot.name()
);
}
} else {
// fallback to handling the raw primitive type if no logical type mapping
switch (pt.getPrimitiveTypeName()) {
case BOOLEAN:
return g.getBoolean(fieldIndex, index);
case INT32:
return g.getInteger(fieldIndex, index);
case INT64:
return g.getLong(fieldIndex, index);
case FLOAT:
return g.getFloat(fieldIndex, index);
case DOUBLE:
return g.getDouble(fieldIndex, index);
case INT96:
Binary tsBin = g.getInt96(fieldIndex, index);
return convertInt96BinaryToTimestamp(tsBin);
case FIXED_LEN_BYTE_ARRAY:
case BINARY:
Binary bin = g.getBinary(fieldIndex, index);
byte[] bytes = bin.getBytes();
if (binaryAsString) {
return StringUtils.fromUtf8(bytes);
} else {
return bytes;
}
default:
throw new RE("Unknown primitive conversion: %s", pt.getPrimitiveTypeName());
}
}
}
catch (Exception ex) {
return null;
}
}