in cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc [238:372]
void Exec() {
if (filter_.type->id() == Type::RUN_END_ENCODED) {
return ExecREEFilter();
}
const auto* filter_is_valid = filter_.buffers[0].data;
const auto* filter_data = filter_.buffers[1].data;
const auto filter_offset = filter_.offset;
if (filter_.null_count == 0 && values_null_count_ == 0) {
// Fast filter when values and filter are not null
::arrow::internal::VisitSetBitRunsVoid(
filter_data, filter_.offset, values_length_,
[&](int64_t position, int64_t length) { WriteValueSegment(position, length); });
return;
}
// Bit counters used for both null_selection behaviors
DropNullCounter drop_null_counter(filter_is_valid, filter_data, filter_offset,
values_length_);
OptionalBitBlockCounter data_counter(values_is_valid_, values_offset_,
values_length_);
OptionalBitBlockCounter filter_valid_counter(filter_is_valid, filter_offset,
values_length_);
auto WriteNotNull = [&](int64_t index) {
bit_util::SetBit(out_is_valid_, out_position_);
// Increments out_position_
WriteValue(index);
};
auto WriteMaybeNull = [&](int64_t index) {
bit_util::SetBitTo(out_is_valid_, out_position_,
bit_util::GetBit(values_is_valid_, values_offset_ + index));
// Increments out_position_
WriteValue(index);
};
int64_t in_position = 0;
while (in_position < values_length_) {
BitBlockCount filter_block = drop_null_counter.NextBlock();
BitBlockCount filter_valid_block = filter_valid_counter.NextWord();
BitBlockCount data_block = data_counter.NextWord();
if (filter_block.AllSet() && data_block.AllSet()) {
// Fastest path: all values in block are included and not null
bit_util::SetBitsTo(out_is_valid_, out_position_, filter_block.length, true);
WriteValueSegment(in_position, filter_block.length);
in_position += filter_block.length;
} else if (filter_block.AllSet()) {
// Faster: all values are selected, but some values are null
// Batch copy bits from values validity bitmap to output validity bitmap
CopyBitmap(values_is_valid_, values_offset_ + in_position, filter_block.length,
out_is_valid_, out_position_);
WriteValueSegment(in_position, filter_block.length);
in_position += filter_block.length;
} else if (filter_block.NoneSet() && null_selection_ == FilterOptions::DROP) {
// For this exceedingly common case in low-selectivity filters we can
// skip further analysis of the data and move on to the next block.
in_position += filter_block.length;
} else {
// Some filter values are false or null
if (data_block.AllSet()) {
// No values are null
if (filter_valid_block.AllSet()) {
// Filter is non-null but some values are false
for (int64_t i = 0; i < filter_block.length; ++i) {
if (bit_util::GetBit(filter_data, filter_offset + in_position)) {
WriteNotNull(in_position);
}
++in_position;
}
} else if (null_selection_ == FilterOptions::DROP) {
// If any values are selected, they ARE NOT null
for (int64_t i = 0; i < filter_block.length; ++i) {
if (bit_util::GetBit(filter_is_valid, filter_offset + in_position) &&
bit_util::GetBit(filter_data, filter_offset + in_position)) {
WriteNotNull(in_position);
}
++in_position;
}
} else { // null_selection == FilterOptions::EMIT_NULL
// Data values in this block are not null
for (int64_t i = 0; i < filter_block.length; ++i) {
const bool is_valid =
bit_util::GetBit(filter_is_valid, filter_offset + in_position);
if (is_valid &&
bit_util::GetBit(filter_data, filter_offset + in_position)) {
// Filter slot is non-null and set
WriteNotNull(in_position);
} else if (!is_valid) {
// Filter slot is null, so we have a null in the output
bit_util::ClearBit(out_is_valid_, out_position_);
WriteNull();
}
++in_position;
}
}
} else { // !data_block.AllSet()
// Some values are null
if (filter_valid_block.AllSet()) {
// Filter is non-null but some values are false
for (int64_t i = 0; i < filter_block.length; ++i) {
if (bit_util::GetBit(filter_data, filter_offset + in_position)) {
WriteMaybeNull(in_position);
}
++in_position;
}
} else if (null_selection_ == FilterOptions::DROP) {
// If any values are selected, they ARE NOT null
for (int64_t i = 0; i < filter_block.length; ++i) {
if (bit_util::GetBit(filter_is_valid, filter_offset + in_position) &&
bit_util::GetBit(filter_data, filter_offset + in_position)) {
WriteMaybeNull(in_position);
}
++in_position;
}
} else { // null_selection == FilterOptions::EMIT_NULL
// Data values in this block are not null
for (int64_t i = 0; i < filter_block.length; ++i) {
const bool is_valid =
bit_util::GetBit(filter_is_valid, filter_offset + in_position);
if (is_valid &&
bit_util::GetBit(filter_data, filter_offset + in_position)) {
// Filter slot is non-null and set
WriteMaybeNull(in_position);
} else if (!is_valid) {
// Filter slot is null, so we have a null in the output
bit_util::ClearBit(out_is_valid_, out_position_);
WriteNull();
}
++in_position;
}
}
}
} // !filter_block.AllSet()
} // while(in_position < values_length_)
}