in be/src/exec/orc/orc-metadata-utils.cc [463:533]
Status OrcSchemaResolver::ValidatePrimitiveType(const ColumnType& type,
const orc::Type& orc_type) const {
switch (orc_type.getKind()) {
case orc::TypeKind::BOOLEAN:
if (type.type == TYPE_BOOLEAN) return Status::OK();
break;
case orc::TypeKind::BYTE:
if (type.type == TYPE_TINYINT || type.type == TYPE_SMALLINT
|| type.type == TYPE_INT || type.type == TYPE_BIGINT) {
return Status::OK();
}
break;
case orc::TypeKind::SHORT:
if (type.type == TYPE_SMALLINT || type.type == TYPE_INT
|| type.type == TYPE_BIGINT) {
return Status::OK();
}
break;
case orc::TypeKind::INT:
if (type.type == TYPE_INT || type.type == TYPE_BIGINT) return Status::OK();
break;
case orc::TypeKind::LONG:
if (type.type == TYPE_BIGINT) return Status::OK();
break;
case orc::TypeKind::FLOAT:
case orc::TypeKind::DOUBLE:
if (type.type == TYPE_FLOAT || type.type == TYPE_DOUBLE) return Status::OK();
break;
case orc::TypeKind::STRING:
case orc::TypeKind::VARCHAR:
case orc::TypeKind::CHAR:
case orc::TypeKind::BINARY:
// orc::TypeKind::BINARY is handled as TYPE_STRING, TYPE_BINARY is not used.
if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR
|| type.type == TYPE_CHAR) {
return Status::OK();
}
break;
case orc::TypeKind::TIMESTAMP:
case orc::TypeKind::TIMESTAMP_INSTANT:
if (type.type == TYPE_TIMESTAMP) return Status::OK();
break;
case orc::TypeKind::DECIMAL: {
if (type.type != TYPE_DECIMAL || type.scale != orc_type.getScale()) break;
bool overflow = false;
int orc_precision = orc_type.getPrecision();
if (orc_precision == 0 || orc_precision > ColumnType::MAX_DECIMAL8_PRECISION) {
// For ORC decimals whose precision is larger than 18, its value can't fit into
// an int64 (10^19 > 2^63). So we should use int128 (16 bytes) for this case.
// The possible byte sizes for Impala decimals are 4, 8, 16.
// We mark it as overflow if the target byte size is not 16.
overflow = (type.GetByteSize() != 16);
} else if (orc_type.getPrecision() > ColumnType::MAX_DECIMAL4_PRECISION) {
// For ORC decimals whose precision <= 18 and > 9, int64 and int128 can fit them.
// We only mark it as overflow if the target byte size is 4.
overflow = (type.GetByteSize() == 4);
}
if (!overflow) return Status::OK();
return Status(Substitute(
"Column $0 in ORC file '$1' can't be truncated to table column $2",
orc_type.toString(), filename_, type.DebugString()));
}
case orc::TypeKind::DATE:
if (type.type == TYPE_DATE) return Status::OK();
break;
default: break;
}
return Status(Substitute(
"Type mismatch: table column $0 is map to column $1 in ORC file '$2'",
type.DebugString(), orc_type.toString(), filename_));
}