in c++/src/SchemaEvolution.cc [172:254]
void SchemaEvolution::buildSafePPDConversionMap(const Type* readType, const Type* fileType) {
if (readType == nullptr || !isPrimitive(readType) || fileType == nullptr ||
!isPrimitive(fileType)) {
return;
}
bool isSafe = false;
if (readType == fileType) {
// short cut for same type
isSafe = true;
} else if (readType->getKind() == DECIMAL && fileType->getKind() == DECIMAL) {
// for decimals alone do equality check to not mess up with precision change
if (fileType->getPrecision() == readType_->getPrecision() &&
fileType->getScale() == readType_->getScale()) {
isSafe = true;
}
} else {
// only integer and string evolutions are safe
// byte -> short -> int -> long
// string <-> char <-> varchar
// NOTE: Float to double evolution is not safe as floats are stored as
// doubles in ORC's internal index, but when doing predicate evaluation
// for queries like "select * from orc_float where f = 74.72" the constant
// on the filter is converted from string -> double so the precisions will
// be different and the comparison will fail.
// Soon, we should convert all sargs that compare equality between floats
// or doubles to range predicates.
// Similarly string -> char and varchar -> char and vice versa is impossible
// as ORC stores char with padded spaces in its internal index.
switch (fileType->getKind()) {
case BYTE: {
if (readType_->getKind() == SHORT || readType_->getKind() == INT ||
readType_->getKind() == LONG) {
isSafe = true;
}
break;
}
case SHORT: {
if (readType_->getKind() == INT || readType_->getKind() == LONG) {
isSafe = true;
}
break;
}
case INT: {
if (readType_->getKind() == LONG) {
isSafe = true;
}
break;
}
case STRING: {
if (readType_->getKind() == VARCHAR) {
isSafe = true;
}
break;
}
case VARCHAR: {
if (readType_->getKind() == STRING) {
isSafe = true;
}
break;
}
case BOOLEAN:
case LONG:
case FLOAT:
case DOUBLE:
case BINARY:
case TIMESTAMP:
case LIST:
case MAP:
case STRUCT:
case UNION:
case DECIMAL:
case DATE:
case CHAR:
case TIMESTAMP_INSTANT:
break;
}
}
if (isSafe) {
safePPDConversionMap_.insert(fileType->getColumnId());
}
}