in cpp/src/arrow/compute/row/compare_internal.cc [332:470]
void KeyCompare::CompareColumnsToRows(
uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
const uint32_t* left_to_right_map, LightContext* ctx, uint32_t* out_num_rows,
uint16_t* out_sel_left_maybe_same, const std::vector<KeyColumnArray>& cols,
const RowTableImpl& rows, bool are_cols_in_encoding_order,
uint8_t* out_match_bitvector_maybe_null) {
if (num_rows_to_compare == 0) {
if (out_match_bitvector_maybe_null) {
DCHECK_EQ(out_num_rows, nullptr);
DCHECK_EQ(out_sel_left_maybe_same, nullptr);
bit_util::ClearBitmap(out_match_bitvector_maybe_null, 0, num_rows_to_compare);
} else {
*out_num_rows = 0;
}
return;
}
// Allocate temporary byte and bit vectors
auto bytevector_A_holder =
util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
auto bytevector_B_holder =
util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
auto bitvector_holder =
util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
uint8_t* match_bytevector_A = bytevector_A_holder.mutable_data();
uint8_t* match_bytevector_B = bytevector_B_holder.mutable_data();
uint8_t* match_bitvector = bitvector_holder.mutable_data();
bool is_first_column = true;
for (size_t icol = 0; icol < cols.size(); ++icol) {
const KeyColumnArray& col = cols[icol];
if (col.metadata().is_null_type) {
// If this null type col is the first column, the match_bytevector_A needs to be
// initialized with 0xFF. Otherwise, the calculation can be skipped
if (is_first_column) {
std::memset(match_bytevector_A, 0xFF, num_rows_to_compare * sizeof(uint8_t));
}
continue;
}
uint32_t offset_within_row =
rows.metadata().encoded_field_offset(ColIdInEncodingOrder(
rows, static_cast<uint32_t>(icol), are_cols_in_encoding_order));
if (col.metadata().is_fixed_length) {
if (sel_left_maybe_null) {
CompareBinaryColumnToRow<true>(
offset_within_row, num_rows_to_compare, sel_left_maybe_null,
left_to_right_map, ctx, col, rows,
is_first_column ? match_bytevector_A : match_bytevector_B);
NullUpdateColumnToRow<true>(
static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
left_to_right_map, ctx, col, rows, are_cols_in_encoding_order,
is_first_column ? match_bytevector_A : match_bytevector_B);
} else {
// Version without using selection vector
CompareBinaryColumnToRow<false>(
offset_within_row, num_rows_to_compare, sel_left_maybe_null,
left_to_right_map, ctx, col, rows,
is_first_column ? match_bytevector_A : match_bytevector_B);
NullUpdateColumnToRow<false>(
static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
left_to_right_map, ctx, col, rows, are_cols_in_encoding_order,
is_first_column ? match_bytevector_A : match_bytevector_B);
}
if (!is_first_column) {
AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B);
}
is_first_column = false;
}
}
uint32_t ivarbinary = 0;
for (size_t icol = 0; icol < cols.size(); ++icol) {
const KeyColumnArray& col = cols[icol];
if (!col.metadata().is_fixed_length) {
// Process varbinary and nulls
if (sel_left_maybe_null) {
if (ivarbinary == 0) {
CompareVarBinaryColumnToRow<true, true>(
ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B);
} else {
CompareVarBinaryColumnToRow<true, false>(ivarbinary, num_rows_to_compare,
sel_left_maybe_null, left_to_right_map,
ctx, col, rows, match_bytevector_B);
}
NullUpdateColumnToRow<true>(
static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
left_to_right_map, ctx, col, rows, are_cols_in_encoding_order,
is_first_column ? match_bytevector_A : match_bytevector_B);
} else {
if (ivarbinary == 0) {
CompareVarBinaryColumnToRow<false, true>(
ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B);
} else {
CompareVarBinaryColumnToRow<false, false>(
ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
ctx, col, rows, match_bytevector_B);
}
NullUpdateColumnToRow<false>(
static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
left_to_right_map, ctx, col, rows, are_cols_in_encoding_order,
is_first_column ? match_bytevector_A : match_bytevector_B);
}
if (!is_first_column) {
AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B);
}
is_first_column = false;
++ivarbinary;
}
}
util::bit_util::bytes_to_bits(ctx->hardware_flags, num_rows_to_compare,
match_bytevector_A, match_bitvector);
if (out_match_bitvector_maybe_null) {
DCHECK_EQ(out_num_rows, nullptr);
DCHECK_EQ(out_sel_left_maybe_same, nullptr);
memcpy(out_match_bitvector_maybe_null, match_bitvector,
bit_util::BytesForBits(num_rows_to_compare));
} else {
if (sel_left_maybe_null) {
int out_num_rows_int;
util::bit_util::bits_filter_indexes(0, ctx->hardware_flags, num_rows_to_compare,
match_bitvector, sel_left_maybe_null,
&out_num_rows_int, out_sel_left_maybe_same);
*out_num_rows = out_num_rows_int;
} else {
int out_num_rows_int;
util::bit_util::bits_to_indexes(0, ctx->hardware_flags, num_rows_to_compare,
match_bitvector, &out_num_rows_int,
out_sel_left_maybe_same);
*out_num_rows = out_num_rows_int;
}
}
}