public Object extractRowColumn()

in ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java [212:570]
338 lines of code
66 McCabe index (conditional complexity)

  public Object extractRowColumn(
      ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex,
      OutputType outputType) {

    if (colVector == null) {
      // The planner will not include unneeded columns for reading but other parts of execution
      // may ask for them..
      return null;
    }
    final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
    if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
      return null;
    }

    final Category category = typeInfo.getCategory();
    switch (category) {
    case PRIMITIVE:
      {
        final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
        final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
        final Writable primitiveWritable = outputType == OutputType.WRITABLES ?
            VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory) :
            null;
        switch (primitiveCategory) {
        case VOID:
          return null;
        case BOOLEAN:
          boolean boolValue = ((LongColumnVector) colVector).vector[adjustedIndex] == 0 ? false : true;
          if (outputType == OutputType.WRITABLES) {
            ((BooleanWritable) primitiveWritable).set(boolValue);
            return primitiveWritable;
          } else {
            return boolValue;
          }
        case BYTE:
          byte byteValue = (byte) ((LongColumnVector) colVector).vector[adjustedIndex];
          if (outputType == OutputType.WRITABLES) {
            ((ByteWritable) primitiveWritable).set(byteValue);
            return primitiveWritable;
          } else {
            return byteValue;
          }
        case SHORT:
          short shortValue = (short) ((LongColumnVector) colVector).vector[adjustedIndex];
          if (outputType == OutputType.WRITABLES) {
            ((ShortWritable) primitiveWritable).set(shortValue);
            return primitiveWritable;
          } else {
            return shortValue;
          }
        case INT:
          int intValue = (int) ((LongColumnVector) colVector).vector[adjustedIndex];
          if (outputType == OutputType.WRITABLES) {
            ((IntWritable) primitiveWritable).set(intValue);
            return primitiveWritable;
          } else {
            return intValue;
          }
        case LONG:
          long longValue = ((LongColumnVector) colVector).vector[adjustedIndex];
          if (outputType == OutputType.WRITABLES) {
            ((LongWritable) primitiveWritable).set(longValue);
            return primitiveWritable;
          } else {
            return longValue;
          }
        case TIMESTAMP:
          // From java.sql.Timestamp used by vectorization to serializable org.apache.hadoop.hive.common.type.Timestamp
          java.sql.Timestamp ts =
              ((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex);
          Timestamp serializableTS = Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos());
          if (outputType == OutputType.WRITABLES) {
            ((TimestampWritableV2) primitiveWritable).set(serializableTS);
            return primitiveWritable;
          } else {
            // return Hive Timestamp object
            return serializableTS;
          }
        case DATE:
          Date dateValue = Date.ofEpochDay((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
          if (outputType == OutputType.WRITABLES) {
            ((DateWritableV2) primitiveWritable).set(dateValue);
            return primitiveWritable;
          } else {
            // return Hive Date object
            return dateValue;
          }
        case FLOAT:
          float floatValue = (float) ((DoubleColumnVector) colVector).vector[adjustedIndex];
          if (outputType == OutputType.WRITABLES) {
            ((FloatWritable) primitiveWritable).set(floatValue);
            return primitiveWritable;
          } else {
            return floatValue;
          }
        case DOUBLE:
          double doubleValue = ((DoubleColumnVector) colVector).vector[adjustedIndex];
          if (outputType == OutputType.WRITABLES) {
            ((DoubleWritable) primitiveWritable).set(doubleValue);
            return primitiveWritable;
          } else {
            return doubleValue;
          }
        case BINARY:
          {
            final BytesColumnVector bytesColVector =
                ((BytesColumnVector) colVector);
            final byte[] bytes = bytesColVector.vector[adjustedIndex];
            final int start = bytesColVector.start[adjustedIndex];
            final int length = bytesColVector.length[adjustedIndex];

            if (outputType == OutputType.WRITABLES) {
              BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
              if (bytes == null || length == 0) {
                if (length > 0) {
                  nullBytesReadError(primitiveCategory, batchIndex);
                }
                bytesWritable.set(EMPTY_BYTES, 0, 0);
              } else {
                bytesWritable.set(bytes, start, length);
              }
              return bytesWritable;
            } else {
              byte[] ret = new byte[length];
              System.arraycopy(bytes, start, ret, 0, length);
              return ret;
            }
          }
        case STRING:
          {
            final BytesColumnVector bytesColVector =
                ((BytesColumnVector) colVector);
            final byte[] bytes = bytesColVector.vector[adjustedIndex];
            final int start = bytesColVector.start[adjustedIndex];
            final int length = bytesColVector.length[adjustedIndex];

            String result = null;
            if (bytes == null || length == 0) {
              if (length > 0) {
                nullBytesReadError(primitiveCategory, batchIndex);
              }
              result = EMPTY_STRING;
              if (outputType == OutputType.WRITABLES) {
                ((Text) primitiveWritable).set(EMPTY_BYTES, 0, 0);
              }
            } else {
              // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
              if (outputType == OutputType.WRITABLES) {
                ((Text) primitiveWritable).set(bytes, start, length);
              } else {
                try {
                  result = Text.decode(bytes, start, length);
                } catch (CharacterCodingException e) {
                  throw new RuntimeException("Could not decode to String object.", e);
                }
              }
            }
            if (outputType == OutputType.WRITABLES) {
              return primitiveWritable;
            } else {
              return result;
            }
          }
        case VARCHAR:
          {
            final BytesColumnVector bytesColVector =
                ((BytesColumnVector) colVector);
            final byte[] bytes = bytesColVector.vector[adjustedIndex];
            final int start = bytesColVector.start[adjustedIndex];
            final int length = bytesColVector.length[adjustedIndex];

            // TODO: maybe not create writable for POJO case
            final HiveVarcharWritable hiveVarcharWritable =
                (HiveVarcharWritable) VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
            if (bytes == null || length == 0) {
              if (length > 0) {
                nullBytesReadError(primitiveCategory, batchIndex);
              }
              hiveVarcharWritable.set(EMPTY_STRING, -1);
            } else {
              final int adjustedLength =
                  StringExpr.truncate(
                      bytes, start, length, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
              if (adjustedLength == 0) {
                hiveVarcharWritable.set(EMPTY_STRING, -1);
              } else {
                hiveVarcharWritable.set(
                    new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
              }
            }
            if (outputType == OutputType.WRITABLES) {
              return hiveVarcharWritable;
            } else {
              return hiveVarcharWritable.getHiveVarchar();
            }
          }
        case CHAR:
          {
            final BytesColumnVector bytesColVector =
                ((BytesColumnVector) colVector);
            final byte[] bytes = bytesColVector.vector[adjustedIndex];
            final int start = bytesColVector.start[adjustedIndex];
            final int length = bytesColVector.length[adjustedIndex];

            // TODO: maybe not create writable for POJO case
            final HiveCharWritable hiveCharWritable =
                (HiveCharWritable) VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
            final int maxLength = ((CharTypeInfo) primitiveTypeInfo).getLength();
            if (bytes == null || length == 0) {
              if (length > 0) {
                nullBytesReadError(primitiveCategory, batchIndex);
              }
              hiveCharWritable.set(EMPTY_STRING, maxLength);
            } else {
              final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length,
                  ((CharTypeInfo) primitiveTypeInfo).getLength());

              if (adjustedLength == 0) {
                hiveCharWritable.set(EMPTY_STRING, maxLength);
              } else {
                hiveCharWritable.set(
                    new String(bytes, start, adjustedLength, Charsets.UTF_8), maxLength);
              }
            }
            if (outputType == OutputType.WRITABLES) {
              return hiveCharWritable;
            } else {
              return hiveCharWritable.getHiveChar();
            }
          }
        case DECIMAL:
          // decimal code is deep within HiveDecimalWritable, probably can't avoid creating it
          HiveDecimalWritable decimalWritable =
              (HiveDecimalWritable) VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
          if (colVector instanceof Decimal64ColumnVector) {
            Decimal64ColumnVector dec32ColVector = (Decimal64ColumnVector) colVector;
            decimalWritable.deserialize64(dec32ColVector.vector[adjustedIndex], dec32ColVector.scale);
          } else {
            // The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields.
            decimalWritable.set(((DecimalColumnVector) colVector).vector[adjustedIndex]);
          }
          if (outputType == OutputType.WRITABLES) {
            return decimalWritable;
          } else {
            return decimalWritable.getHiveDecimal();
          }
        case INTERVAL_YEAR_MONTH:
          HiveIntervalYearMonth hiveIntervalYearMonthValue =
              new HiveIntervalYearMonth((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
          if (outputType == OutputType.WRITABLES) {
            ((HiveIntervalYearMonthWritable) primitiveWritable).set(hiveIntervalYearMonthValue);
            return primitiveWritable;
          } else {
            return hiveIntervalYearMonthValue;
          }
        case INTERVAL_DAY_TIME:
          HiveIntervalDayTime hiveIntervalDayTimeValue =
              ((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedIndex);
          if (outputType == OutputType.WRITABLES) {
            ((HiveIntervalDayTimeWritable) primitiveWritable).set(hiveIntervalDayTimeValue);
            return primitiveWritable;
          } else {
            return hiveIntervalDayTimeValue;
          }
        default:
          throw new RuntimeException("Primitive category " + primitiveCategory.name() +
              " not supported");
        }
      }
    case LIST:
      {
        final ListColumnVector listColumnVector = (ListColumnVector) colVector;
        final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
        final ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector;
        final int offset = (int) listColumnVector.offsets[adjustedIndex];
        final int size = (int) listColumnVector.lengths[adjustedIndex];

        final List list = new ArrayList();
        for (int i = 0; i < size; i++) {
          list.add(
              extractRowColumn(
                  listColumnVector.child,
                  listTypeInfo.getListElementTypeInfo(),
                  listObjectInspector.getListElementObjectInspector(),
                  offset + i));
        }
        return list;
      }
    case MAP:
      {
        final MapColumnVector mapColumnVector = (MapColumnVector) colVector;
        final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
        final MapObjectInspector mapObjectInspector = (MapObjectInspector) objectInspector;
        final int offset = (int) mapColumnVector.offsets[adjustedIndex];
        final int size = (int) mapColumnVector.lengths[adjustedIndex];

        final Map<Object, Object> map = new LinkedHashMap<Object, Object>();
        for (int i = 0; i < size; i++) {
          final Object key = extractRowColumn(
              mapColumnVector.keys,
              mapTypeInfo.getMapKeyTypeInfo(),
              mapObjectInspector.getMapKeyObjectInspector(),
              offset + i);
          final Object value = extractRowColumn(
              mapColumnVector.values,
              mapTypeInfo.getMapValueTypeInfo(),
              mapObjectInspector.getMapValueObjectInspector(),
              offset + i);
          map.put(key, value);
        }
        return map;
      }
    case STRUCT:
      {
        final StructColumnVector structColumnVector = (StructColumnVector) colVector;
        final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
        final StandardStructObjectInspector structInspector =
            (StandardStructObjectInspector) objectInspector;
        final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
        final int size = fieldTypeInfos.size();
        final List<? extends StructField> structFields =
            structInspector.getAllStructFieldRefs();

        final Object struct = structInspector.create();
        for (int i = 0; i < size; i++) {
          final StructField structField = structFields.get(i);
          final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i);
          final Object value = extractRowColumn(
              structColumnVector.fields[i],
              fieldTypeInfo,
              structField.getFieldObjectInspector(),
              adjustedIndex);
          structInspector.setStructFieldData(struct, structField, value);
        }
        return struct;
      }
    case UNION:
      {
        final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
        final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
        final UnionObjectInspector unionInspector = (UnionObjectInspector) objectInspector;
        final List<ObjectInspector> unionInspectors = unionInspector.getObjectInspectors();
        final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector;
        final byte tag = (byte) unionColumnVector.tags[adjustedIndex];
        final Object object = extractRowColumn(
            unionColumnVector.fields[tag],
            objectTypeInfos.get(tag),
            unionInspectors.get(tag),
            adjustedIndex);

        final StandardUnion standardUnion = new StandardUnion();
        standardUnion.setTag(tag);
        standardUnion.setObject(object);
        return standardUnion;
      }
    default:
      throw new RuntimeException("Category " + category.name() + " not supported");
    }
  }