in velox/expression/SimpleFunctionAdapter.h [299:412]
void iterate(
ApplyContext& applyContext,
bool allNotNull,
const TReader&... readers) const {
// If is_default_contains_nulls_behavior we return null if the inputs
// contain any nulls.
// If !is_default_contains_nulls_behavior we don't invoke callNullFree
// if the inputs contain any nulls, but rather invoke call or callNullable
// as usual.
bool callNullFree = FUNC::is_default_contains_nulls_behavior ||
(FUNC::udf_has_callNullFree && !applyContext.mayHaveNullsRecursive);
// Iterate the rows.
if constexpr (fastPathIteration) {
uint64_t* nullBuffer = nullptr;
auto* data = applyContext.result->mutableRawValues();
auto writeResult = [&applyContext, &nullBuffer, &data](
auto row, bool notNull, auto out) INLINE_LAMBDA {
// For fast path iteration, all active rows were already set as non-null
// beforehand, so we only need to update the null buffer if the function
// returned null (which is not the common case).
if (notNull) {
if constexpr (return_type_traits::typeKind == TypeKind::BOOLEAN) {
bits::setBit(data, row, out);
} else {
data[row] = out;
}
} else {
if (!nullBuffer) {
nullBuffer = applyContext.result->mutableRawNulls();
}
bits::setNull(nullBuffer, row);
}
};
if (callNullFree) {
// This results in some code duplication, but applying this check once
// per batch instead of once per row shows a significant performance
// improvement when there are no nulls.
if (applyContext.mayHaveNullsRecursive) {
applyContext.applyToSelectedNoThrow([&](auto row) INLINE_LAMBDA {
typename return_type_traits::NativeType out{};
auto containsNull = (readers.containsNull(row) || ...);
bool notNull;
if (containsNull) {
// Result is NULL because the input contains NULL.
notNull = false;
} else {
notNull = doApplyNullFree<0>(row, out, readers...);
}
writeResult(row, notNull, out);
});
} else {
applyContext.applyToSelectedNoThrow([&](auto row) INLINE_LAMBDA {
typename return_type_traits::NativeType out{};
bool notNull = doApplyNullFree<0>(row, out, readers...);
writeResult(row, notNull, out);
});
}
} else if (allNotNull) {
applyContext.applyToSelectedNoThrow([&](auto row) INLINE_LAMBDA {
// Passing a stack variable have shown to be boost the performance of
// functions that repeatedly update the output.
// The opposite optimization (eliminating the temp) is easier to do
// by the compiler (assuming the function call is inlined).
typename return_type_traits::NativeType out{};
bool notNull = doApplyNotNull<0>(row, out, readers...);
writeResult(row, notNull, out);
});
} else {
applyContext.applyToSelectedNoThrow([&](auto row) INLINE_LAMBDA {
typename return_type_traits::NativeType out{};
bool notNull = doApply<0>(row, out, readers...);
writeResult(row, notNull, out);
});
}
} else {
if (callNullFree) {
// This results in some code duplication, but applying this check once
// per batch instead of once per row shows a significant performance
// improvement when there are no nulls.
if (applyContext.mayHaveNullsRecursive) {
applyUdf(applyContext, [&](auto& out, auto row) INLINE_LAMBDA {
auto containsNull = (readers.containsNull(row) || ...);
if (containsNull) {
// Result is NULL because the input contains NULL.
return false;
}
return doApplyNullFree<0>(row, out, readers...);
});
} else {
applyUdf(applyContext, [&](auto& out, auto row) INLINE_LAMBDA {
return doApplyNullFree<0>(row, out, readers...);
});
}
} else if (allNotNull) {
if (applyContext.allAscii) {
applyUdf(applyContext, [&](auto& out, auto row) INLINE_LAMBDA {
return doApplyAsciiNotNull<0>(row, out, readers...);
});
} else {
applyUdf(applyContext, [&](auto& out, auto row) INLINE_LAMBDA {
return doApplyNotNull<0>(row, out, readers...);
});
}
} else {
applyUdf(applyContext, [&](auto& out, auto row) INLINE_LAMBDA {
return doApply<0>(row, out, readers...);
});
}
}
}