void StringDictionaryColumnReader::readFlatVector()

in velox/dwio/dwrf/reader/ColumnReader.cpp [1235:1333]


void StringDictionaryColumnReader::readFlatVector(
    uint64_t numValues,
    VectorPtr& result,
    const uint64_t* incomingNulls) {
  auto flatVector = resetIfWrongFlatVectorType<StringView>(result);
  BufferPtr data;
  if (flatVector) {
    data = flatVector->mutableValues(numValues);
  }

  BufferPtr nulls = readNulls(numValues, result, incomingNulls);
  const auto* nullsPtr = nulls ? nulls->as<uint64_t>() : nullptr;
  uint64_t nullCount = nullsPtr ? bits::countNulls(nullsPtr, 0, numValues) : 0;

  if (result) {
    detail::resetIfNotWritable(result, data);
  }
  if (!data) {
    data = AlignedBuffer::allocate<StringView>(numValues, &memoryPool_);
  }

  // load inDictionary
  const char* inDictPtr = nullptr;
  if (inDictionaryReader) {
    detail::ensureCapacity<bool>(inDict, numValues, &memoryPool_);
    inDictionaryReader->next(inDict->asMutable<char>(), numValues, nullsPtr);
    inDictPtr = inDict->as<char>();
  }
  auto dataPtr = data->asMutable<StringView>();

  // read indices
  if (!indices_ || indices_->capacity() < numValues * sizeof(int64_t)) {
    indices_ = AlignedBuffer::allocate<int64_t>(numValues, &memoryPool_);
  }
  auto indices = indices_->asMutable<int64_t>();
  dictIndex->next(indices, numValues, nullsPtr);

  const char* strideDictPtr = nullptr;
  int64_t* strideDictOffsetPtr = nullptr;
  if (strideDict) {
    strideDictPtr = strideDict->as<char>();
    strideDictOffsetPtr = strideDictOffset->asMutable<int64_t>();
  }
  auto* dictionaryBlobPtr = dictionaryBlob->as<char>();
  auto* dictionaryOffsetsPtr = dictionaryOffset->asMutable<int64_t>();
  bool hasStrideDict = false;
  const char* strData;
  int64_t strLen;
  if (nulls) {
    for (uint64_t i = 0; i < numValues; ++i) {
      if (!bits::isBitNull(nullsPtr, i)) {
        hasStrideDict = setOutput(
                            i,
                            indices[i],
                            dictionaryBlobPtr,
                            dictionaryOffsetsPtr,
                            strideDictPtr,
                            strideDictOffsetPtr,
                            inDictPtr,
                            strData,
                            strLen) ||
            hasStrideDict;
        dataPtr[i] = StringView{strData, static_cast<uint32_t>(strLen)};
      }
    }
  } else {
    for (uint64_t i = 0; i < numValues; ++i) {
      hasStrideDict = setOutput(
                          i,
                          indices[i],
                          dictionaryBlobPtr,
                          dictionaryOffsetsPtr,
                          strideDictPtr,
                          strideDictOffsetPtr,
                          inDictPtr,
                          strData,
                          strLen) ||
          hasStrideDict;
      dataPtr[i] = StringView{strData, static_cast<uint32_t>(strLen)};
    }
  }
  std::vector<BufferPtr> stringBuffers = {dictionaryBlob};
  if (hasStrideDict) {
    stringBuffers.emplace_back(strideDict);
  }
  if (result) {
    result->setSize(numValues);
    result->setNullCount(nullCount);
    flatVector->setStringBuffers(stringBuffers);
  } else {
    result = std::make_shared<FlatVector<StringView>>(
        &memoryPool_,
        nulls,
        numValues,
        data,
        std::vector<BufferPtr>{stringBuffers});
    result->setNullCount(nullCount);
  }
}