void SelectiveStringDictionaryColumnReader::read()

in velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.cpp [208:291]


void SelectiveStringDictionaryColumnReader::read(
    vector_size_t offset,
    RowSet rows,
    const uint64_t* incomingNulls) {
  static std::array<char, 1> EMPTY_DICT;

  prepareRead<int32_t>(offset, rows, incomingNulls);
  bool isDense = rows.back() == rows.size() - 1;
  const auto* nullsPtr =
      nullsInReadRange_ ? nullsInReadRange_->as<uint64_t>() : nullptr;
  // lazy loading dictionary data when first hit
  ensureInitialized();

  if (inDictionaryReader_) {
    auto end = rows.back() + 1;
    bool isBulk = useBulkPath();
    int32_t numFlags = (isBulk && nullsInReadRange_)
        ? bits::countNonNulls(nullsInReadRange_->as<uint64_t>(), 0, end)
        : end;
    detail::ensureCapacity<uint64_t>(
        inDict_, bits::nwords(numFlags), &memoryPool_);
    inDictionaryReader_->next(
        inDict_->asMutable<char>(), numFlags, isBulk ? nullptr : nullsPtr);
    loadStrideDictionary();
    if (strideDict_) {
      DWIO_ENSURE_NOT_NULL(strideDictOffset_);

      // It's possible strideDictBlob is nullptr when stride dictionary only
      // contains empty string. In that case, we need to make sure
      // strideDictBlob points to some valid address, and the last entry of
      // strideDictOffset_ have value 0.
      auto strideDictBlob = strideDict_->as<char>();
      if (!strideDictBlob) {
        strideDictBlob = EMPTY_DICT.data();
        DWIO_ENSURE_EQ(strideDictOffset_->as<int64_t>()[strideDictCount_], 0);
      }
    }
  }
  if (scanSpec_->keepValues()) {
    if (scanSpec_->valueHook()) {
      if (isDense) {
        readHelper<common::AlwaysTrue, true>(
            &alwaysTrue(),
            rows,
            ExtractStringDictionaryToGenericHook(
                scanSpec_->valueHook(),
                rows,
                (strideDict_ && inDict_) ? inDict_->as<uint64_t>() : nullptr,
                dictionaryBlob_->as<char>(),
                dictionaryOffset_->as<uint64_t>(),
                dictionaryCount_,
                strideDict_ ? strideDict_->as<char>() : nullptr,
                strideDictOffset_ ? strideDictOffset_->as<uint64_t>()
                                  : nullptr));
      } else {
        readHelper<common::AlwaysTrue, false>(
            &alwaysTrue(),
            rows,
            ExtractStringDictionaryToGenericHook(
                scanSpec_->valueHook(),
                rows,
                (strideDict_ && inDict_) ? inDict_->as<uint64_t>() : nullptr,
                dictionaryBlob_->as<char>(),
                dictionaryOffset_->as<uint64_t>(),
                dictionaryCount_,
                strideDict_ ? strideDict_->as<char>() : nullptr,
                strideDictOffset_ ? strideDictOffset_->as<uint64_t>()
                                  : nullptr));
      }
      return;
    }
    if (isDense) {
      processFilter<true>(scanSpec_->filter(), rows, ExtractToReader(this));
    } else {
      processFilter<false>(scanSpec_->filter(), rows, ExtractToReader(this));
    }
  } else {
    if (isDense) {
      processFilter<true>(scanSpec_->filter(), rows, DropValues());
    } else {
      processFilter<false>(scanSpec_->filter(), rows, DropValues());
    }
  }
}