private Object convertPrimitiveField()

in extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupConverter.java [348:493]


  private Object convertPrimitiveField(Group g, int fieldIndex, int index)
  {
    PrimitiveType pt = (PrimitiveType) g.getType().getFields().get(fieldIndex);
    OriginalType ot = pt.getOriginalType();

    try {
      if (ot != null) {
        // convert logical types
        switch (ot) {
          case DATE:
            long ts = convertDateToMillis(g.getInteger(fieldIndex, index));
            return ts;
          case TIME_MICROS:
            return g.getLong(fieldIndex, index);
          case TIME_MILLIS:
            return g.getInteger(fieldIndex, index);
          case TIMESTAMP_MICROS:
            return TimeUnit.MILLISECONDS.convert(g.getLong(fieldIndex, index), TimeUnit.MICROSECONDS);
          case TIMESTAMP_MILLIS:
            return g.getLong(fieldIndex, index);
          case INTERVAL:
          /*
          INTERVAL is used for an interval of time. It must annotate a fixed_len_byte_array of length 12.
          This array stores three little-endian unsigned integers that represent durations at different
          granularities of time. The first stores a number in months, the second stores a number in days,
          and the third stores a number in milliseconds. This representation is independent of any particular
          timezone or date.

          Each component in this representation is independent of the others. For example, there is no
          requirement that a large number of days should be expressed as a mix of months and days because there is
          not a constant conversion from days to months.

          The sort order used for INTERVAL is undefined. When writing data, no min/max statistics should be
           saved for this type and if such non-compliant statistics are found during reading, they must be ignored.
           */
            Binary intervalVal = g.getBinary(fieldIndex, index);
            IntBuffer intBuf = intervalVal.toByteBuffer().order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
            int months = intBuf.get(0);
            int days = intBuf.get(1);
            int millis = intBuf.get(2);
            StringBuilder periodBuilder = new StringBuilder("P");
            if (months > 0) {
              periodBuilder.append(months).append("M");
            }
            if (days > 0) {
              periodBuilder.append(days).append("D");
            }
            if (periodBuilder.length() > 1) {
              Period p = Period.parse(periodBuilder.toString());
              Duration d = p.toStandardDuration().plus(millis);
              return d;
            } else {
              return new Duration(millis);
            }
          case INT_8:
          case INT_16:
          case INT_32:
            return g.getInteger(fieldIndex, index);
          case INT_64:
            return g.getLong(fieldIndex, index);
          case UINT_8:
          case UINT_16:
            return g.getInteger(fieldIndex, index);
          case UINT_32:
            return Integer.toUnsignedLong(g.getInteger(fieldIndex, index));
          case UINT_64:
            return g.getLong(fieldIndex, index);
          case DECIMAL:
          /*
            DECIMAL can be used to annotate the following types:
              int32: for 1 <= precision <= 9
              int64: for 1 <= precision <= 18; precision < 10 will produce a warning
              fixed_len_byte_array: precision is limited by the array size. Length n can
                store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits
              binary: precision is not limited, but is required. The minimum number of bytes to store
                the unscaled value should be used.
           */
            int precision = pt.asPrimitiveType().getDecimalMetadata().getPrecision();
            int scale = pt.asPrimitiveType().getDecimalMetadata().getScale();
            switch (pt.getPrimitiveTypeName()) {
              case INT32:
                // The primitive returned from Group is an unscaledValue.
                // We need to do unscaledValue * 10^(-scale) to convert back to decimal
                return new BigDecimal(g.getInteger(fieldIndex, index)).movePointLeft(scale);
              case INT64:
                // The primitive returned from Group is an unscaledValue.
                // We need to do unscaledValue * 10^(-scale) to convert back to decimal
                return new BigDecimal(g.getLong(fieldIndex, index)).movePointLeft(scale);
              case FIXED_LEN_BYTE_ARRAY:
              case BINARY:
                Binary value = g.getBinary(fieldIndex, index);
                return convertBinaryToDecimal(value, precision, scale);
              default:
                throw new RE(
                    "Unknown 'DECIMAL' type supplied to primitive conversion: %s (this should never happen)",
                    pt.getPrimitiveTypeName()
                );
            }
          case UTF8:
          case ENUM:
          case JSON:
            return g.getString(fieldIndex, index);
          case LIST:
          case MAP:
          case MAP_KEY_VALUE:
          case BSON:
          default:
            throw new RE(
                "Non-primitive supplied to primitive conversion: %s (this should never happen)",
                ot.name()
            );
        }
      } else {
        // fallback to handling the raw primitive type if no logical type mapping
        switch (pt.getPrimitiveTypeName()) {
          case BOOLEAN:
            return g.getBoolean(fieldIndex, index);
          case INT32:
            return g.getInteger(fieldIndex, index);
          case INT64:
            return g.getLong(fieldIndex, index);
          case FLOAT:
            return g.getFloat(fieldIndex, index);
          case DOUBLE:
            return g.getDouble(fieldIndex, index);
          case INT96:
            Binary tsBin = g.getInt96(fieldIndex, index);
            return convertInt96BinaryToTimestamp(tsBin);
          case FIXED_LEN_BYTE_ARRAY:
          case BINARY:
            Binary bin = g.getBinary(fieldIndex, index);
            byte[] bytes = bin.getBytes();
            if (binaryAsString) {
              return StringUtils.fromUtf8(bytes);
            } else {
              return bytes;
            }
          default:
            throw new RE("Unknown primitive conversion: %s", pt.getPrimitiveTypeName());
        }
      }
    }
    catch (Exception ex) {
      return null;
    }
  }