void StringDictionaryColumnReader::readDictionaryVector()

in velox/dwio/dwrf/reader/ColumnReader.cpp [1102:1233]


void StringDictionaryColumnReader::readDictionaryVector(
    uint64_t numValues,
    VectorPtr& result,
    const uint64_t* incomingNulls) {
  auto dictVector =
      detail::resetIfWrongVectorType<DictionaryVector<StringView>>(result);
  BufferPtr indices;
  if (dictVector) {
    indices = dictVector->mutableIndices(numValues);
  }

  BufferPtr nulls = readNulls(numValues, result, incomingNulls);
  const auto* nullsPtr = nulls ? nulls->as<uint64_t>() : nullptr;
  uint64_t nullCount = nullsPtr ? bits::countNulls(nullsPtr, 0, numValues) : 0;

  if (result) {
    detail::resetIfNotWritable(result, indices);
  }
  if (!indices) {
    indices = AlignedBuffer::allocate<vector_size_t>(numValues, &memoryPool_);
  }

  auto indicesPtr = indices->asMutable<vector_size_t>();
  dictIndex->nextInts(indicesPtr, numValues, nullsPtr);
  indices->setSize(numValues * sizeof(vector_size_t));

  bool hasStrideDict = false;

  // load inDictionary
  const char* inDictPtr = nullptr;
  if (inDictionaryReader) {
    detail::ensureCapacity<bool>(inDict, numValues, &memoryPool_);
    inDictionaryReader->next(inDict->asMutable<char>(), numValues, nullsPtr);
    inDictPtr = inDict->as<char>();
  }

  if (nulls) {
    for (uint64_t i = 0; i < numValues; ++i) {
      if (!bits::isBitNull(nullsPtr, i)) {
        if (!inDictPtr || bits::isBitSet(inDictPtr, i)) {
          // points to an entry in rowgroup dictionary
        } else {
          // points to an entry in stride dictionary
          indicesPtr[i] += dictionaryCount;
          hasStrideDict = true;
        }
      }
    }
  } else {
    for (uint64_t i = 0; i < numValues; ++i) {
      if (!inDictPtr || bits::isBitSet(inDictPtr, i)) {
        // points to an entry in rowgroup dictionary
      } else {
        // points to an entry in stride dictionary
        indicesPtr[i] += dictionaryCount;
        hasStrideDict = true;
      }
    }
  }

  VectorPtr dictionaryValues;
  const auto* dictionaryBlobPtr = dictionaryBlob->as<char>();
  const auto* dictionaryOffsetsPtr = dictionaryOffset->as<int64_t>();
  if (hasStrideDict) {
    if (!combinedDictionaryValues_) {
      // TODO Reuse memory
      BufferPtr values = AlignedBuffer::allocate<StringView>(
          dictionaryCount + strideDictCount, &memoryPool_);
      auto* valuesPtr = values->asMutable<StringView>();
      for (size_t i = 0; i < dictionaryCount; i++) {
        valuesPtr[i] = StringView(
            dictionaryBlobPtr + dictionaryOffsetsPtr[i],
            dictionaryOffsetsPtr[i + 1] - dictionaryOffsetsPtr[i]);
      }

      const auto* strideDictPtr = strideDict->as<char>();
      const auto* strideDictOffsetPtr = strideDictOffset->as<int64_t>();
      for (size_t i = 0; i < strideDictCount; i++) {
        valuesPtr[dictionaryCount + i] = StringView(
            strideDictPtr + strideDictOffsetPtr[i],
            strideDictOffsetPtr[i + 1] - strideDictOffsetPtr[i]);
      }

      combinedDictionaryValues_ = std::make_shared<FlatVector<StringView>>(
          &memoryPool_,
          nodeType_->type,
          BufferPtr(nullptr), // TODO nulls
          dictionaryCount + strideDictCount /*length*/,
          values,
          std::vector<BufferPtr>{dictionaryBlob, strideDict});
    }

    dictionaryValues = combinedDictionaryValues_;
  } else {
    if (!dictionaryValues_) {
      // TODO Reuse memory
      BufferPtr values =
          AlignedBuffer::allocate<StringView>(dictionaryCount, &memoryPool_);
      auto* valuesPtr = values->asMutable<StringView>();
      for (size_t i = 0; i < dictionaryCount; i++) {
        valuesPtr[i] = StringView(
            dictionaryBlobPtr + dictionaryOffsetsPtr[i],
            dictionaryOffsetsPtr[i + 1] - dictionaryOffsetsPtr[i]);
      }

      dictionaryValues_ = std::make_shared<FlatVector<StringView>>(
          &memoryPool_,
          nodeType_->type,
          BufferPtr(nullptr), // TODO nulls
          dictionaryCount /*length*/,
          values,
          std::vector<BufferPtr>{dictionaryBlob});
    }
    dictionaryValues = dictionaryValues_;
  }

  if (result) {
    result->setSize(numValues);
    result->setNullCount(nullCount);
    result->as<DictionaryVector<StringView>>()->setDictionaryValues(
        dictionaryValues);
  } else {
    result = std::make_shared<DictionaryVector<StringView>>(
        &memoryPool_,
        nulls,
        numValues,
        dictionaryValues,
        TypeKind::INTEGER,
        indices);
    result->setNullCount(nullCount);
  }
}