bool nonNullRowsFromSparse()

in velox/dwio/dwrf/common/DecoderUtil.cpp [54:156]


bool nonNullRowsFromSparse(
    const uint64_t* nulls,
    RowSet rows,
    raw_vector<int32_t>& innerRows,
    raw_vector<int32_t>& outerRows,
    uint64_t* resultNulls,
    int32_t& tailSkip) {
  constexpr int32_t kStep = 8;
  bool anyNull = false;
  auto numIn = rows.size();
  innerRows.resize(numIn);
  outerRows.resize(numIn);
  auto inner = innerRows.data();
  auto outer = outerRows.data();
  int32_t numNulls = 0;
  int32_t numInner = 0;
  int32_t lastNonNull = -1;
  auto resultNullBytes = reinterpret_cast<uint8_t*>(resultNulls);

  // Returns the index in terms of non-null rows for
  // 'rows[i]'. Assumes that i is increasing between calls.
  auto innerFor = [&](int32_t i) {
    auto row = rows[i];
    DCHECK_GT(row, lastNonNull);
    auto skip = row - lastNonNull - 1;
    if (!skip) {
      // Consecutive non-nulls
    } else if (skip < 56) {
      auto bits = loadUpTo56Bits(nulls, lastNonNull + 1, skip);
      numNulls += skip - __builtin_popcountll(bits);
    } else {
      numNulls += skip -
          bits::countBits(nulls, lastNonNull + 1, lastNonNull + skip + 1);
    }
    lastNonNull = row;
    return row - numNulls;
  };
  for (auto i = 0; i < numIn; i += kStep) {
    int32_t width = i + kStep > numIn ? numIn - i : kStep;
    int16_t widthMask = bits::lowMask(width);
    if (isDense(rows.data() + i, width)) {
      uint16_t flags = load8Bits(nulls, rows[i]) & widthMask;
      if (outputNulls) {
        resultNullBytes[i / 8] = flags;
        anyNull |= flags != widthMask;
      }
      if (!flags) {
        continue;
      }
      auto numNonNull = __builtin_popcount(flags);
      auto setBits = V32::compareSetBits(flags);
      // contiguous inner rows.
      auto innerStart = innerFor(i);
      V32::store(inner + numInner, V32::iota() + innerStart);
      if (isFilter) {
        simd::storePermute(
            outer + numInner, V32::load(rows.data() + i), setBits);
      } else {
        V32::store(outer + numInner, setBits + i);
      }
      // We processed 'width' consecutive. The inner count is incremented
      // by number of non-nulls. Nulls are counted for the 'width' rows,
      // so we set lastNonNull to be the last of these and increment
      // numNulls by the number of nulls in the 8 rows. This is
      // correct even if the last of the 8 is null since the null
      // count is correct up to that row.
      numInner += numNonNull;
      lastNonNull = rows[i + width - 1];
      numNulls += width - numNonNull;
    } else {
      auto next8Rows = V32::load(rows.data() + i);
      uint16_t flags = simd::gather8Bits(nulls, next8Rows, width);
      if (outputNulls) {
        resultNullBytes[i / 8] = flags;
        anyNull |= flags != widthMask;
      }
      if (!flags) {
        continue;
      }
      auto setBits = V32::compareSetBits(flags);
      if (isFilter) {
        // The non-null indices among 'rows' are possible filter results.
        simd::storePermute(outer + numInner, next8Rows, setBits);
      } else {
        // Make scatter indices so that there are gaps for 'rows' that hit a
        // null.
        V32::store(outer + numInner, setBits + i);
      }
      // Calculate the inner row corresponding to each non-null in 'next8Rows'.
      while (flags) {
        int32_t index = bits::getAndClearLastSetBit(flags);
        inner[numInner++] = innerFor(i + index);
      }
    }
  }
  innerRows.resize(numInner);
  outerRows.resize(numInner);
  // If ending with a null, skip the non-nulls between last non-null in
  // rows and the last null in 'rows'.
  tailSkip = bits::countBits(nulls, lastNonNull + 1, rows.back());

  return anyNull;
}