in velox/dwio/dwrf/reader/SelectiveStringDictionaryColumnReader.cpp [208:291]
void SelectiveStringDictionaryColumnReader::read(
vector_size_t offset,
RowSet rows,
const uint64_t* incomingNulls) {
static std::array<char, 1> EMPTY_DICT;
prepareRead<int32_t>(offset, rows, incomingNulls);
bool isDense = rows.back() == rows.size() - 1;
const auto* nullsPtr =
nullsInReadRange_ ? nullsInReadRange_->as<uint64_t>() : nullptr;
// lazy loading dictionary data when first hit
ensureInitialized();
if (inDictionaryReader_) {
auto end = rows.back() + 1;
bool isBulk = useBulkPath();
int32_t numFlags = (isBulk && nullsInReadRange_)
? bits::countNonNulls(nullsInReadRange_->as<uint64_t>(), 0, end)
: end;
detail::ensureCapacity<uint64_t>(
inDict_, bits::nwords(numFlags), &memoryPool_);
inDictionaryReader_->next(
inDict_->asMutable<char>(), numFlags, isBulk ? nullptr : nullsPtr);
loadStrideDictionary();
if (strideDict_) {
DWIO_ENSURE_NOT_NULL(strideDictOffset_);
// It's possible strideDictBlob is nullptr when stride dictionary only
// contains empty string. In that case, we need to make sure
// strideDictBlob points to some valid address, and the last entry of
// strideDictOffset_ have value 0.
auto strideDictBlob = strideDict_->as<char>();
if (!strideDictBlob) {
strideDictBlob = EMPTY_DICT.data();
DWIO_ENSURE_EQ(strideDictOffset_->as<int64_t>()[strideDictCount_], 0);
}
}
}
if (scanSpec_->keepValues()) {
if (scanSpec_->valueHook()) {
if (isDense) {
readHelper<common::AlwaysTrue, true>(
&alwaysTrue(),
rows,
ExtractStringDictionaryToGenericHook(
scanSpec_->valueHook(),
rows,
(strideDict_ && inDict_) ? inDict_->as<uint64_t>() : nullptr,
dictionaryBlob_->as<char>(),
dictionaryOffset_->as<uint64_t>(),
dictionaryCount_,
strideDict_ ? strideDict_->as<char>() : nullptr,
strideDictOffset_ ? strideDictOffset_->as<uint64_t>()
: nullptr));
} else {
readHelper<common::AlwaysTrue, false>(
&alwaysTrue(),
rows,
ExtractStringDictionaryToGenericHook(
scanSpec_->valueHook(),
rows,
(strideDict_ && inDict_) ? inDict_->as<uint64_t>() : nullptr,
dictionaryBlob_->as<char>(),
dictionaryOffset_->as<uint64_t>(),
dictionaryCount_,
strideDict_ ? strideDict_->as<char>() : nullptr,
strideDictOffset_ ? strideDictOffset_->as<uint64_t>()
: nullptr));
}
return;
}
if (isDense) {
processFilter<true>(scanSpec_->filter(), rows, ExtractToReader(this));
} else {
processFilter<false>(scanSpec_->filter(), rows, ExtractToReader(this));
}
} else {
if (isDense) {
processFilter<true>(scanSpec_->filter(), rows, DropValues());
} else {
processFilter<false>(scanSpec_->filter(), rows, DropValues());
}
}
}