in ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java [212:570]
public Object extractRowColumn(
ColumnVector colVector, TypeInfo typeInfo, ObjectInspector objectInspector, int batchIndex,
OutputType outputType) {
if (colVector == null) {
// The planner will not include unneeded columns for reading but other parts of execution
// may ask for them..
return null;
}
final int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex);
if (!colVector.noNulls && colVector.isNull[adjustedIndex]) {
return null;
}
final Category category = typeInfo.getCategory();
switch (category) {
case PRIMITIVE:
{
final PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
final PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
final Writable primitiveWritable = outputType == OutputType.WRITABLES ?
VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory) :
null;
switch (primitiveCategory) {
case VOID:
return null;
case BOOLEAN:
boolean boolValue = ((LongColumnVector) colVector).vector[adjustedIndex] == 0 ? false : true;
if (outputType == OutputType.WRITABLES) {
((BooleanWritable) primitiveWritable).set(boolValue);
return primitiveWritable;
} else {
return boolValue;
}
case BYTE:
byte byteValue = (byte) ((LongColumnVector) colVector).vector[adjustedIndex];
if (outputType == OutputType.WRITABLES) {
((ByteWritable) primitiveWritable).set(byteValue);
return primitiveWritable;
} else {
return byteValue;
}
case SHORT:
short shortValue = (short) ((LongColumnVector) colVector).vector[adjustedIndex];
if (outputType == OutputType.WRITABLES) {
((ShortWritable) primitiveWritable).set(shortValue);
return primitiveWritable;
} else {
return shortValue;
}
case INT:
int intValue = (int) ((LongColumnVector) colVector).vector[adjustedIndex];
if (outputType == OutputType.WRITABLES) {
((IntWritable) primitiveWritable).set(intValue);
return primitiveWritable;
} else {
return intValue;
}
case LONG:
long longValue = ((LongColumnVector) colVector).vector[adjustedIndex];
if (outputType == OutputType.WRITABLES) {
((LongWritable) primitiveWritable).set(longValue);
return primitiveWritable;
} else {
return longValue;
}
case TIMESTAMP:
// From java.sql.Timestamp used by vectorization to serializable org.apache.hadoop.hive.common.type.Timestamp
java.sql.Timestamp ts =
((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex);
Timestamp serializableTS = Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos());
if (outputType == OutputType.WRITABLES) {
((TimestampWritableV2) primitiveWritable).set(serializableTS);
return primitiveWritable;
} else {
// return Hive Timestamp object
return serializableTS;
}
case DATE:
Date dateValue = Date.ofEpochDay((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
if (outputType == OutputType.WRITABLES) {
((DateWritableV2) primitiveWritable).set(dateValue);
return primitiveWritable;
} else {
// return Hive Date object
return dateValue;
}
case FLOAT:
float floatValue = (float) ((DoubleColumnVector) colVector).vector[adjustedIndex];
if (outputType == OutputType.WRITABLES) {
((FloatWritable) primitiveWritable).set(floatValue);
return primitiveWritable;
} else {
return floatValue;
}
case DOUBLE:
double doubleValue = ((DoubleColumnVector) colVector).vector[adjustedIndex];
if (outputType == OutputType.WRITABLES) {
((DoubleWritable) primitiveWritable).set(doubleValue);
return primitiveWritable;
} else {
return doubleValue;
}
case BINARY:
{
final BytesColumnVector bytesColVector =
((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
if (outputType == OutputType.WRITABLES) {
BytesWritable bytesWritable = (BytesWritable) primitiveWritable;
if (bytes == null || length == 0) {
if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
bytesWritable.set(EMPTY_BYTES, 0, 0);
} else {
bytesWritable.set(bytes, start, length);
}
return bytesWritable;
} else {
byte[] ret = new byte[length];
System.arraycopy(bytes, start, ret, 0, length);
return ret;
}
}
case STRING:
{
final BytesColumnVector bytesColVector =
((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
String result = null;
if (bytes == null || length == 0) {
if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
result = EMPTY_STRING;
if (outputType == OutputType.WRITABLES) {
((Text) primitiveWritable).set(EMPTY_BYTES, 0, 0);
}
} else {
// Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
if (outputType == OutputType.WRITABLES) {
((Text) primitiveWritable).set(bytes, start, length);
} else {
try {
result = Text.decode(bytes, start, length);
} catch (CharacterCodingException e) {
throw new RuntimeException("Could not decode to String object.", e);
}
}
}
if (outputType == OutputType.WRITABLES) {
return primitiveWritable;
} else {
return result;
}
}
case VARCHAR:
{
final BytesColumnVector bytesColVector =
((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
// TODO: maybe not create writable for POJO case
final HiveVarcharWritable hiveVarcharWritable =
(HiveVarcharWritable) VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
if (bytes == null || length == 0) {
if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
hiveVarcharWritable.set(EMPTY_STRING, -1);
} else {
final int adjustedLength =
StringExpr.truncate(
bytes, start, length, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
if (adjustedLength == 0) {
hiveVarcharWritable.set(EMPTY_STRING, -1);
} else {
hiveVarcharWritable.set(
new String(bytes, start, adjustedLength, Charsets.UTF_8), -1);
}
}
if (outputType == OutputType.WRITABLES) {
return hiveVarcharWritable;
} else {
return hiveVarcharWritable.getHiveVarchar();
}
}
case CHAR:
{
final BytesColumnVector bytesColVector =
((BytesColumnVector) colVector);
final byte[] bytes = bytesColVector.vector[adjustedIndex];
final int start = bytesColVector.start[adjustedIndex];
final int length = bytesColVector.length[adjustedIndex];
// TODO: maybe not create writable for POJO case
final HiveCharWritable hiveCharWritable =
(HiveCharWritable) VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
final int maxLength = ((CharTypeInfo) primitiveTypeInfo).getLength();
if (bytes == null || length == 0) {
if (length > 0) {
nullBytesReadError(primitiveCategory, batchIndex);
}
hiveCharWritable.set(EMPTY_STRING, maxLength);
} else {
final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length,
((CharTypeInfo) primitiveTypeInfo).getLength());
if (adjustedLength == 0) {
hiveCharWritable.set(EMPTY_STRING, maxLength);
} else {
hiveCharWritable.set(
new String(bytes, start, adjustedLength, Charsets.UTF_8), maxLength);
}
}
if (outputType == OutputType.WRITABLES) {
return hiveCharWritable;
} else {
return hiveCharWritable.getHiveChar();
}
}
case DECIMAL:
// decimal code is deep within HiveDecimalWritable, probably can't avoid creating it
HiveDecimalWritable decimalWritable =
(HiveDecimalWritable) VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory);
if (colVector instanceof Decimal64ColumnVector) {
Decimal64ColumnVector dec32ColVector = (Decimal64ColumnVector) colVector;
decimalWritable.deserialize64(dec32ColVector.vector[adjustedIndex], dec32ColVector.scale);
} else {
// The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields.
decimalWritable.set(((DecimalColumnVector) colVector).vector[adjustedIndex]);
}
if (outputType == OutputType.WRITABLES) {
return decimalWritable;
} else {
return decimalWritable.getHiveDecimal();
}
case INTERVAL_YEAR_MONTH:
HiveIntervalYearMonth hiveIntervalYearMonthValue =
new HiveIntervalYearMonth((int) ((LongColumnVector) colVector).vector[adjustedIndex]);
if (outputType == OutputType.WRITABLES) {
((HiveIntervalYearMonthWritable) primitiveWritable).set(hiveIntervalYearMonthValue);
return primitiveWritable;
} else {
return hiveIntervalYearMonthValue;
}
case INTERVAL_DAY_TIME:
HiveIntervalDayTime hiveIntervalDayTimeValue =
((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedIndex);
if (outputType == OutputType.WRITABLES) {
((HiveIntervalDayTimeWritable) primitiveWritable).set(hiveIntervalDayTimeValue);
return primitiveWritable;
} else {
return hiveIntervalDayTimeValue;
}
default:
throw new RuntimeException("Primitive category " + primitiveCategory.name() +
" not supported");
}
}
case LIST:
{
final ListColumnVector listColumnVector = (ListColumnVector) colVector;
final ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
final ListObjectInspector listObjectInspector = (ListObjectInspector) objectInspector;
final int offset = (int) listColumnVector.offsets[adjustedIndex];
final int size = (int) listColumnVector.lengths[adjustedIndex];
final List list = new ArrayList();
for (int i = 0; i < size; i++) {
list.add(
extractRowColumn(
listColumnVector.child,
listTypeInfo.getListElementTypeInfo(),
listObjectInspector.getListElementObjectInspector(),
offset + i));
}
return list;
}
case MAP:
{
final MapColumnVector mapColumnVector = (MapColumnVector) colVector;
final MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
final MapObjectInspector mapObjectInspector = (MapObjectInspector) objectInspector;
final int offset = (int) mapColumnVector.offsets[adjustedIndex];
final int size = (int) mapColumnVector.lengths[adjustedIndex];
final Map<Object, Object> map = new LinkedHashMap<Object, Object>();
for (int i = 0; i < size; i++) {
final Object key = extractRowColumn(
mapColumnVector.keys,
mapTypeInfo.getMapKeyTypeInfo(),
mapObjectInspector.getMapKeyObjectInspector(),
offset + i);
final Object value = extractRowColumn(
mapColumnVector.values,
mapTypeInfo.getMapValueTypeInfo(),
mapObjectInspector.getMapValueObjectInspector(),
offset + i);
map.put(key, value);
}
return map;
}
case STRUCT:
{
final StructColumnVector structColumnVector = (StructColumnVector) colVector;
final StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
final StandardStructObjectInspector structInspector =
(StandardStructObjectInspector) objectInspector;
final List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
final int size = fieldTypeInfos.size();
final List<? extends StructField> structFields =
structInspector.getAllStructFieldRefs();
final Object struct = structInspector.create();
for (int i = 0; i < size; i++) {
final StructField structField = structFields.get(i);
final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i);
final Object value = extractRowColumn(
structColumnVector.fields[i],
fieldTypeInfo,
structField.getFieldObjectInspector(),
adjustedIndex);
structInspector.setStructFieldData(struct, structField, value);
}
return struct;
}
case UNION:
{
final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
final List<TypeInfo> objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
final UnionObjectInspector unionInspector = (UnionObjectInspector) objectInspector;
final List<ObjectInspector> unionInspectors = unionInspector.getObjectInspectors();
final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector;
final byte tag = (byte) unionColumnVector.tags[adjustedIndex];
final Object object = extractRowColumn(
unionColumnVector.fields[tag],
objectTypeInfos.get(tag),
unionInspectors.get(tag),
adjustedIndex);
final StandardUnion standardUnion = new StandardUnion();
standardUnion.setTag(tag);
standardUnion.setObject(object);
return standardUnion;
}
default:
throw new RuntimeException("Category " + category.name() + " not supported");
}
}