in velox/dwio/dwrf/reader/ColumnVisitors.h [1049:1141]
void processRun(
const int32_t* input,
int32_t numInput,
const int32_t* scatterRows,
int32_t* filterHits,
int32_t* values,
int32_t& numValues) {
DCHECK(input == values + numValues);
setByInDict(values + numValues, numInput);
if (!hasFilter) {
if (hasHook) {
for (auto i = 0; i < numInput; ++i) {
auto value = input[i];
super::values_.addValue(
scatterRows ? scatterRows[super::rowIndex_ + i]
: super::rowIndex_ + i,
value);
}
}
DCHECK_EQ(input, values + numValues);
if (scatter) {
scatterDense(input, scatterRows + super::rowIndex_, numInput, values);
}
numValues = scatter ? scatterRows[super::rowIndex_ + numInput - 1] + 1
: numValues + numInput;
super::rowIndex_ += numInput;
return;
}
constexpr bool filterOnly =
std::is_same<typename super::Extract, DropValues>::value;
constexpr int32_t kWidth = V32::VSize;
for (auto i = 0; i < numInput; i += kWidth) {
auto indices = V32::load(input + i);
V32::TV cache;
if (i + kWidth > numInput) {
cache = V32::maskGather32<1>(
V32::setAll(0),
V32::leadingMask(numInput - i),
DictSuper::filterCache_ - 3,
indices);
} else {
cache = V32::gather32<1>(DictSuper::filterCache_ - 3, indices);
}
auto unknowns = V32::compareBitMask((cache & (kUnknown << 24)) << 1);
auto passed = V32::compareBitMask(cache);
if (UNLIKELY(unknowns)) {
uint16_t bits = unknowns;
while (bits) {
int index = bits::getAndClearLastSetBit(bits);
int32_t value = input[i + index];
bool result;
if (value >= baseDictSize_) {
result = applyFilter(
super::filter_, valueInDictionary(value - baseDictSize_, true));
} else {
result =
applyFilter(super::filter_, valueInDictionary(value, false));
}
if (result) {
DictSuper::filterCache_[value] = FilterResult::kSuccess;
passed |= 1 << index;
} else {
DictSuper::filterCache_[value] = FilterResult::kFailure;
}
}
}
if (!passed) {
continue;
} else if (passed == (1 << V32::VSize) - 1) {
V32::store(
filterHits + numValues,
V32::load(
(scatter ? scatterRows : super::rows_) + super::rowIndex_ + i));
if (!filterOnly) {
V32::store(values + numValues, indices);
}
numValues += kWidth;
} else {
int8_t numBits = __builtin_popcount(passed);
auto setBits = V32::load(&V32::byteSetBits()[passed]);
simd::storePermute(
filterHits + numValues,
V32::load(
(scatter ? scatterRows : super::rows_) + super::rowIndex_ + i),
setBits);
if (!filterOnly) {
simd::storePermute(values + numValues, indices, setBits);
}
numValues += numBits;
}
}
super::rowIndex_ += numInput;
}