void SchemaEvolution::buildSafePPDConversionMap()

in c++/src/SchemaEvolution.cc [172:254]


  void SchemaEvolution::buildSafePPDConversionMap(const Type* readType, const Type* fileType) {
    if (readType == nullptr || !isPrimitive(readType) || fileType == nullptr ||
        !isPrimitive(fileType)) {
      return;
    }

    bool isSafe = false;
    if (readType == fileType) {
      // short cut for same type
      isSafe = true;
    } else if (readType->getKind() == DECIMAL && fileType->getKind() == DECIMAL) {
      // for decimals alone do equality check to not mess up with precision change
      if (fileType->getPrecision() == readType_->getPrecision() &&
          fileType->getScale() == readType_->getScale()) {
        isSafe = true;
      }
    } else {
      // only integer and string evolutions are safe
      // byte -> short -> int -> long
      // string <-> char <-> varchar
      // NOTE: Float to double evolution is not safe as floats are stored as
      // doubles in ORC's internal index, but when doing predicate evaluation
      // for queries like "select * from orc_float where f = 74.72" the constant
      // on the filter is converted from string -> double so the precisions will
      // be different and the comparison will fail.
      // Soon, we should convert all sargs that compare equality between floats
      // or doubles to range predicates.
      // Similarly string -> char and varchar -> char and vice versa is impossible
      // as ORC stores char with padded spaces in its internal index.
      switch (fileType->getKind()) {
        case BYTE: {
          if (readType_->getKind() == SHORT || readType_->getKind() == INT ||
              readType_->getKind() == LONG) {
            isSafe = true;
          }
          break;
        }
        case SHORT: {
          if (readType_->getKind() == INT || readType_->getKind() == LONG) {
            isSafe = true;
          }
          break;
        }
        case INT: {
          if (readType_->getKind() == LONG) {
            isSafe = true;
          }
          break;
        }
        case STRING: {
          if (readType_->getKind() == VARCHAR) {
            isSafe = true;
          }
          break;
        }
        case VARCHAR: {
          if (readType_->getKind() == STRING) {
            isSafe = true;
          }
          break;
        }
        case BOOLEAN:
        case LONG:
        case FLOAT:
        case DOUBLE:
        case BINARY:
        case TIMESTAMP:
        case LIST:
        case MAP:
        case STRUCT:
        case UNION:
        case DECIMAL:
        case DATE:
        case CHAR:
        case TIMESTAMP_INSTANT:
          break;
      }
    }

    if (isSafe) {
      safePPDConversionMap_.insert(fileType->getColumnId());
    }
  }