private void fillGroup()

in server/pxf-hdfs/src/main/java/org/greenplum/pxf/plugins/hdfs/ParquetResolver.java [120:207]


    private void fillGroup(int index, OneField field, Group group, Type type) throws IOException {
        if (field.val == null)
            return;
        switch (type.asPrimitiveType().getPrimitiveTypeName()) {
            case BINARY:
                if (type.getLogicalTypeAnnotation() instanceof StringLogicalTypeAnnotation)
                    group.add(index, (String) field.val);
                else
                    group.add(index, Binary.fromReusedByteArray((byte[]) field.val));
                break;
            case INT32:
                if (type.getLogicalTypeAnnotation() instanceof DateLogicalTypeAnnotation) {
                    String dateString = (String) field.val;
                    group.add(index, ParquetTypeConverter.getDaysFromEpochFromDateString(dateString));
                } else if (type.getLogicalTypeAnnotation() instanceof IntLogicalTypeAnnotation &&
                        ((IntLogicalTypeAnnotation) type.getLogicalTypeAnnotation()).getBitWidth() == 16) {
                    group.add(index, (Short) field.val);
                } else {
                    group.add(index, (Integer) field.val);
                }
                break;
            case INT64:
                group.add(index, (Long) field.val);
                break;
            case DOUBLE:
                group.add(index, (Double) field.val);
                break;
            case FLOAT:
                group.add(index, (Float) field.val);
                break;
            case FIXED_LEN_BYTE_ARRAY:
                // From org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter.DecimalDataWriter#decimalToBinary
                String value = (String) field.val;
                DecimalLogicalTypeAnnotation typeAnnotation = (DecimalLogicalTypeAnnotation) type.getLogicalTypeAnnotation();
                int precision = Math.min(HiveDecimal.MAX_PRECISION, typeAnnotation.getPrecision());
                int scale = Math.min(HiveDecimal.MAX_SCALE, typeAnnotation.getScale());
                HiveDecimal hiveDecimal = HiveDecimal.enforcePrecisionScale(
                        HiveDecimal.create(value),
                        precision,
                        scale);

                if (hiveDecimal == null) {
                    // When precision is higher than HiveDecimal.MAX_PRECISION
                    // and enforcePrecisionScale returns null, it means we
                    // cannot store the value in Parquet because we have
                    // exceeded the precision. To make the behavior consistent
                    // with Hive's behavior when storing on a Parquet-backed
                    // table, we store the value as null.
                    return;
                }

                byte[] decimalBytes = hiveDecimal.bigIntegerBytesScaled(scale);

                // Estimated number of bytes needed.
                int precToBytes = ParquetFileAccessor.PRECISION_TO_BYTE_COUNT[precision - 1];
                if (precToBytes == decimalBytes.length) {
                    // No padding needed.
                    group.add(index, Binary.fromReusedByteArray(decimalBytes));
                } else {
                    byte[] tgt = new byte[precToBytes];
                    if (hiveDecimal.signum() == -1) {
                        // For negative number, initializing bits to 1
                        for (int i = 0; i < precToBytes; i++) {
                            tgt[i] |= 0xFF;
                        }
                    }
                    System.arraycopy(decimalBytes, 0, tgt, precToBytes - decimalBytes.length, decimalBytes.length); // Padding leading zeroes/ones.
                    group.add(index, Binary.fromReusedByteArray(tgt));
                }
                // end -- org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter.DecimalDataWriter#decimalToBinary
                break;
            case INT96:  // SQL standard timestamp string value with or without time zone literals: https://www.postgresql.org/docs/9.4/datatype-datetime.html
                String timestamp = (String) field.val;
                if (TIMESTAMP_PATTERN.matcher(timestamp).find()) {
                    // Note: this conversion convert type "timestamp with time zone" will lose timezone information
                    // while preserving the correct value. (as Parquet doesn't support timestamp with time zone.
                    group.add(index, ParquetTypeConverter.getBinaryFromTimestampWithTimeZone(timestamp));
                } else {
                    group.add(index, ParquetTypeConverter.getBinaryFromTimestamp(timestamp));
                }
                break;
            case BOOLEAN:
                group.add(index, (Boolean) field.val);
                break;
            default:
                throw new IOException("Not supported type " + type.asPrimitiveType().getPrimitiveTypeName());
        }
    }