in velox/dwio/dwrf/common/DecoderUtil.h [178:384]
void fixedWidthScan(
folly::Range<const int32_t*> rows,
const int32_t* scatterRows,
void* voidValues,
int32_t* filterHits,
int32_t& numValues,
SeekableInputStream& input,
const char*& bufferStart,
const char*& bufferEnd,
TFilter& filter,
THook& hook) {
constexpr int32_t kWidth = simd::Vectors<T>::VSize;
constexpr bool is16 = sizeof(T) == 2;
constexpr int32_t kStep = is16 ? 16 : 8;
constexpr bool hasFilter = !std::is_same<TFilter, common::AlwaysTrue>::value;
constexpr bool hasHook = !std::is_same<THook, NoHook>::value;
auto rawValues = reinterpret_cast<T*>(voidValues);
loopOverBuffers<T>(
rows,
0,
input,
bufferStart,
bufferEnd,
[&](T value, int32_t rowIndex) {
if (!hasFilter) {
if (hasHook) {
hook.addValue(scatterRows[rowIndex], &value);
} else {
auto targetRow = scatter ? scatterRows[rowIndex] : rowIndex;
rawValues[targetRow] = value;
}
++numValues;
} else {
if (common::applyFilter(filter, value)) {
auto targetRow = scatter ? scatterRows[rowIndex] : rows[rowIndex];
filterHits[numValues++] = targetRow;
if (!filterOnly) {
rawValues[numValues - 1] = value;
}
}
}
},
[&](const int32_t* rows,
int32_t rowIndex,
int32_t numRowsInBuffer,
int32_t rowOffset,
const T* buffer) {
rowLoop(
rows,
rowIndex,
rowIndex + numRowsInBuffer,
kStep,
[&](int32_t rowIndex) {
auto firstRow = rows[rowIndex];
if (!hasFilter) {
if (hasHook) {
hook.addValues(
scatterRows + rowIndex,
buffer + firstRow - rowOffset,
kStep,
sizeof(T));
} else {
if (scatter) {
scatterDense(
buffer + firstRow - rowOffset,
scatterRows + rowIndex,
kStep,
rawValues);
} else {
simd::memcpy(
rawValues + numValues,
buffer + firstRow - rowOffset,
sizeof(T) * kStep);
}
}
numValues += kStep;
} else {
for (auto step = 0; step < kStep / kWidth; ++step) {
auto values =
simd::Vectors<T>::load(buffer + firstRow - rowOffset);
processFixedFilter<T, filterOnly, scatter, true>(
reinterpret_cast<__m256i>(values),
kWidth,
firstRow,
filter,
[&](int32_t offset) {
return simd::Vectors<T>::loadGather32Indices(
(scatter ? scatterRows : rows) + rowIndex +
8 * offset);
},
rawValues,
filterHits,
numValues);
firstRow += kWidth;
rowIndex += kWidth;
}
}
},
[&](int32_t rowIndex) {
for (auto step = 0; step < kStep / kWidth; ++step) {
auto indices =
simd::Vectors<T>::loadGather32Indices(rows + rowIndex);
__m256i values;
if (is16) {
values = reinterpret_cast<__m256i>(simd::gather16x32(
buffer - rowOffset, rows + rowIndex, 16));
} else {
values = reinterpret_cast<__m256i>(
simd::Vectors<T>::gather32(buffer - rowOffset, indices));
}
if (!hasFilter) {
if (hasHook) {
hook.addValues(
scatterRows + rowIndex, &values, kWidth, sizeof(T));
} else {
if (scatter) {
scatterDense<T>(
&values, scatterRows + rowIndex, kWidth, rawValues);
} else {
simd::Vectors<int64_t>::store(
rawValues + numValues, values);
}
numValues += kWidth;
}
} else {
processFixedFilter<T, filterOnly, scatter, false>(
values,
kWidth,
-1,
filter,
[&](int32_t offset) {
if (offset) {
return simd::Vectors<T>::loadGather32Indices(
(scatter ? scatterRows : rows) + rowIndex +
8 * offset);
}
return scatter ? simd::Vectors<T>::loadGather32Indices(
scatterRows + rowIndex)
: indices;
},
rawValues,
filterHits,
numValues);
}
rowIndex += kWidth;
}
},
[&](int32_t rowIndex, int32_t numRows) {
int32_t step = 0;
while (step < numRows) {
__m256i values;
int width = std::min<int32_t>(kWidth, numRows - step);
if (is16) {
values = reinterpret_cast<__m256i>(simd::gather16x32(
buffer - rowOffset, rows + rowIndex, numRows));
} else {
auto indices =
simd::Vectors<T>::loadGather32Indices(rows + rowIndex);
if (width < kWidth) {
values = reinterpret_cast<__m256i>(
simd::Vectors<T>::maskGather32(
simd::Vectors<T>::setAll(0),
simd::Vectors<T>::leadingMask(width),
buffer - rowOffset,
indices));
} else {
values =
reinterpret_cast<__m256i>(simd::Vectors<T>::gather32(
buffer - rowOffset, indices));
}
}
if (!hasFilter) {
if (hasHook) {
hook.addValues(
scatterRows + rowIndex, &values, width, sizeof(T));
} else {
if (scatter) {
scatterDense<T>(
&values, scatterRows + rowIndex, width, rawValues);
} else {
simd::Vectors<int64_t>::store(
rawValues + numValues, values);
numValues += width;
}
}
} else {
processFixedFilter<T, filterOnly, scatter, false>(
values,
width,
-1,
filter,
[&](int32_t offset) {
return simd::Vectors<T>::loadGather32Indices(
(scatter ? scatterRows : rows) + rowIndex +
8 * offset);
},
rawValues,
filterHits,
numValues);
}
rowIndex += width;
step += width;
}
});
});
}