in velox/dwio/dwrf/reader/ColumnReader.cpp [1235:1333]
void StringDictionaryColumnReader::readFlatVector(
uint64_t numValues,
VectorPtr& result,
const uint64_t* incomingNulls) {
auto flatVector = resetIfWrongFlatVectorType<StringView>(result);
BufferPtr data;
if (flatVector) {
data = flatVector->mutableValues(numValues);
}
BufferPtr nulls = readNulls(numValues, result, incomingNulls);
const auto* nullsPtr = nulls ? nulls->as<uint64_t>() : nullptr;
uint64_t nullCount = nullsPtr ? bits::countNulls(nullsPtr, 0, numValues) : 0;
if (result) {
detail::resetIfNotWritable(result, data);
}
if (!data) {
data = AlignedBuffer::allocate<StringView>(numValues, &memoryPool_);
}
// load inDictionary
const char* inDictPtr = nullptr;
if (inDictionaryReader) {
detail::ensureCapacity<bool>(inDict, numValues, &memoryPool_);
inDictionaryReader->next(inDict->asMutable<char>(), numValues, nullsPtr);
inDictPtr = inDict->as<char>();
}
auto dataPtr = data->asMutable<StringView>();
// read indices
if (!indices_ || indices_->capacity() < numValues * sizeof(int64_t)) {
indices_ = AlignedBuffer::allocate<int64_t>(numValues, &memoryPool_);
}
auto indices = indices_->asMutable<int64_t>();
dictIndex->next(indices, numValues, nullsPtr);
const char* strideDictPtr = nullptr;
int64_t* strideDictOffsetPtr = nullptr;
if (strideDict) {
strideDictPtr = strideDict->as<char>();
strideDictOffsetPtr = strideDictOffset->asMutable<int64_t>();
}
auto* dictionaryBlobPtr = dictionaryBlob->as<char>();
auto* dictionaryOffsetsPtr = dictionaryOffset->asMutable<int64_t>();
bool hasStrideDict = false;
const char* strData;
int64_t strLen;
if (nulls) {
for (uint64_t i = 0; i < numValues; ++i) {
if (!bits::isBitNull(nullsPtr, i)) {
hasStrideDict = setOutput(
i,
indices[i],
dictionaryBlobPtr,
dictionaryOffsetsPtr,
strideDictPtr,
strideDictOffsetPtr,
inDictPtr,
strData,
strLen) ||
hasStrideDict;
dataPtr[i] = StringView{strData, static_cast<uint32_t>(strLen)};
}
}
} else {
for (uint64_t i = 0; i < numValues; ++i) {
hasStrideDict = setOutput(
i,
indices[i],
dictionaryBlobPtr,
dictionaryOffsetsPtr,
strideDictPtr,
strideDictOffsetPtr,
inDictPtr,
strData,
strLen) ||
hasStrideDict;
dataPtr[i] = StringView{strData, static_cast<uint32_t>(strLen)};
}
}
std::vector<BufferPtr> stringBuffers = {dictionaryBlob};
if (hasStrideDict) {
stringBuffers.emplace_back(strideDict);
}
if (result) {
result->setSize(numValues);
result->setNullCount(nullCount);
flatVector->setStringBuffers(stringBuffers);
} else {
result = std::make_shared<FlatVector<StringView>>(
&memoryPool_,
nulls,
numValues,
data,
std::vector<BufferPtr>{stringBuffers});
result->setNullCount(nullCount);
}
}