Expr::PeelEncodingsResult Expr::peelEncodings()

in velox/expression/Expr.cpp [398:543]


Expr::PeelEncodingsResult Expr::peelEncodings(
    EvalCtx* context,
    ContextSaver* saver,
    const SelectivityVector& rows,
    LocalDecodedVector& localDecoded,
    LocalSelectivityVector& newRowsHolder,
    LocalSelectivityVector& finalRowsHolder) {
  if (context->wrapEncoding() == VectorEncoding::Simple::CONSTANT) {
    return Expr::PeelEncodingsResult::empty();
  }
  std::vector<VectorPtr> peeledVectors;
  std::vector<VectorPtr> maybePeeled;
  std::vector<bool> constantFields;
  int numLevels = 0;
  bool peeled;
  bool nonConstant = false;
  auto numFields = context->row()->childrenSize();
  int32_t firstPeeled = -1;
  do {
    peeled = true;
    BufferPtr firstIndices;
    BufferPtr firstLengths;
    for (const auto& field : distinctFields_) {
      auto fieldIndex = field->index(context);
      assert(fieldIndex >= 0 && fieldIndex < numFields);
      auto leaf = peeledVectors.empty() ? context->getField(fieldIndex)
                                        : peeledVectors[fieldIndex];
      if (!constantFields.empty() && constantFields[fieldIndex]) {
        setPeeled(leaf, fieldIndex, context, maybePeeled);
        continue;
      }
      if (numLevels == 0 && leaf->isConstant(rows)) {
        setPeeled(leaf, fieldIndex, context, maybePeeled);
        constantFields.resize(numFields);
        constantFields.at(fieldIndex) = true;
        continue;
      }
      nonConstant = true;
      auto encoding = leaf->encoding();
      if (encoding == VectorEncoding::Simple::DICTIONARY) {
        if (firstLengths) {
          // having a mix of dictionary and sequence encoded fields
          peeled = false;
          break;
        }
        if (!propagatesNulls_ && leaf->rawNulls()) {
          // A dictionary that adds nulls over an Expr that is not null for a
          // null argument cannot be peeled.
          peeled = false;
          break;
        }
        BufferPtr indices = leaf->wrapInfo();
        if (!firstIndices) {
          firstIndices = std::move(indices);
        } else if (indices != firstIndices) {
          // different fields use different dictionaries
          peeled = false;
          break;
        }
        if (firstPeeled == -1) {
          firstPeeled = fieldIndex;
        }
        setPeeled(leaf->valueVector(), fieldIndex, context, maybePeeled);
      } else if (encoding == VectorEncoding::Simple::SEQUENCE) {
        if (firstIndices) {
          // having a mix of dictionary and sequence encoded fields
          peeled = false;
          break;
        }
        BufferPtr lengths = leaf->wrapInfo();
        if (!firstLengths) {
          firstLengths = std::move(lengths);
        } else if (lengths != firstLengths) {
          // different fields use different sequences
          peeled = false;
          break;
        }
        if (firstPeeled == -1) {
          firstPeeled = fieldIndex;
        }
        setPeeled(leaf->valueVector(), fieldIndex, context, maybePeeled);
      } else {
        // Non-peelable encoding.
        peeled = false;
        break;
      }
    }
    if (peeled) {
      ++numLevels;
      peeledVectors = std::move(maybePeeled);
    }
  } while (peeled && nonConstant);

  if (numLevels == 0 && nonConstant) {
    return Expr::PeelEncodingsResult::empty();
  }

  // We peel off the wrappers and make a new selection.
  SelectivityVector* newRows;
  SelectivityVector* newFinalSelection;
  if (firstPeeled == -1) {
    // All the fields are constant across the rows of interest.
    newRows = singleRow(newRowsHolder, rows.begin());
    context->saveAndReset(saver, rows);
    context->setConstantWrap(rows.begin());
  } else {
    auto decoded = localDecoded.get();
    auto firstWrapper = context->getField(firstPeeled).get();
    const auto& rowsToDecode =
        context->isFinalSelection() ? rows : *context->finalSelection();
    decoded->makeIndices(*firstWrapper, rowsToDecode, numLevels);
    auto indices = decoded->indices();

    newRows = translateToInnerRows(rows, *decoded, newRowsHolder);

    if (!context->isFinalSelection()) {
      newFinalSelection = translateToInnerRows(
          *context->finalSelection(), *decoded, finalRowsHolder);
    }

    context->saveAndReset(saver, rows);

    if (!context->isFinalSelection()) {
      *context->mutableFinalSelection() = newFinalSelection;
    }

    setDictionaryWrapping(*decoded, rows, *firstWrapper, context);
  }
  int numPeeled = 0;
  for (int i = 0; i < peeledVectors.size(); ++i) {
    auto& values = peeledVectors[i];
    if (!values) {
      continue;
    }
    if (!constantFields.empty() && constantFields[i]) {
      context->setPeeled(
          i, BaseVector::wrapInConstant(rows.size(), rows.begin(), values));
    } else {
      context->setPeeled(i, values);
      ++numPeeled;
    }
  }
  // If the expression depends on one dictionary, results are cacheable.
  bool mayCache = numPeeled == 1 && constantFields.empty();
  return {newRows, newFinalSelection, mayCache};
}