void KeyCompare::CompareColumnsToRows()

in cpp/src/arrow/compute/row/compare_internal.cc [332:470]


void KeyCompare::CompareColumnsToRows(
    uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
    const uint32_t* left_to_right_map, LightContext* ctx, uint32_t* out_num_rows,
    uint16_t* out_sel_left_maybe_same, const std::vector<KeyColumnArray>& cols,
    const RowTableImpl& rows, bool are_cols_in_encoding_order,
    uint8_t* out_match_bitvector_maybe_null) {
  if (num_rows_to_compare == 0) {
    if (out_match_bitvector_maybe_null) {
      DCHECK_EQ(out_num_rows, nullptr);
      DCHECK_EQ(out_sel_left_maybe_same, nullptr);
      bit_util::ClearBitmap(out_match_bitvector_maybe_null, 0, num_rows_to_compare);
    } else {
      *out_num_rows = 0;
    }
    return;
  }

  // Allocate temporary byte and bit vectors
  auto bytevector_A_holder =
      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
  auto bytevector_B_holder =
      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);
  auto bitvector_holder =
      util::TempVectorHolder<uint8_t>(ctx->stack, num_rows_to_compare);

  uint8_t* match_bytevector_A = bytevector_A_holder.mutable_data();
  uint8_t* match_bytevector_B = bytevector_B_holder.mutable_data();
  uint8_t* match_bitvector = bitvector_holder.mutable_data();

  bool is_first_column = true;
  for (size_t icol = 0; icol < cols.size(); ++icol) {
    const KeyColumnArray& col = cols[icol];

    if (col.metadata().is_null_type) {
      // If this null type col is the first column, the match_bytevector_A needs to be
      // initialized with 0xFF. Otherwise, the calculation can be skipped
      if (is_first_column) {
        std::memset(match_bytevector_A, 0xFF, num_rows_to_compare * sizeof(uint8_t));
      }
      continue;
    }

    uint32_t offset_within_row =
        rows.metadata().encoded_field_offset(ColIdInEncodingOrder(
            rows, static_cast<uint32_t>(icol), are_cols_in_encoding_order));
    if (col.metadata().is_fixed_length) {
      if (sel_left_maybe_null) {
        CompareBinaryColumnToRow<true>(
            offset_within_row, num_rows_to_compare, sel_left_maybe_null,
            left_to_right_map, ctx, col, rows,
            is_first_column ? match_bytevector_A : match_bytevector_B);
        NullUpdateColumnToRow<true>(
            static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
            left_to_right_map, ctx, col, rows, are_cols_in_encoding_order,
            is_first_column ? match_bytevector_A : match_bytevector_B);
      } else {
        // Version without using selection vector
        CompareBinaryColumnToRow<false>(
            offset_within_row, num_rows_to_compare, sel_left_maybe_null,
            left_to_right_map, ctx, col, rows,
            is_first_column ? match_bytevector_A : match_bytevector_B);
        NullUpdateColumnToRow<false>(
            static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
            left_to_right_map, ctx, col, rows, are_cols_in_encoding_order,
            is_first_column ? match_bytevector_A : match_bytevector_B);
      }
      if (!is_first_column) {
        AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B);
      }
      is_first_column = false;
    }
  }

  uint32_t ivarbinary = 0;
  for (size_t icol = 0; icol < cols.size(); ++icol) {
    const KeyColumnArray& col = cols[icol];
    if (!col.metadata().is_fixed_length) {
      // Process varbinary and nulls
      if (sel_left_maybe_null) {
        if (ivarbinary == 0) {
          CompareVarBinaryColumnToRow<true, true>(
              ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
              ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B);
        } else {
          CompareVarBinaryColumnToRow<true, false>(ivarbinary, num_rows_to_compare,
                                                   sel_left_maybe_null, left_to_right_map,
                                                   ctx, col, rows, match_bytevector_B);
        }
        NullUpdateColumnToRow<true>(
            static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
            left_to_right_map, ctx, col, rows, are_cols_in_encoding_order,
            is_first_column ? match_bytevector_A : match_bytevector_B);
      } else {
        if (ivarbinary == 0) {
          CompareVarBinaryColumnToRow<false, true>(
              ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
              ctx, col, rows, is_first_column ? match_bytevector_A : match_bytevector_B);
        } else {
          CompareVarBinaryColumnToRow<false, false>(
              ivarbinary, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
              ctx, col, rows, match_bytevector_B);
        }
        NullUpdateColumnToRow<false>(
            static_cast<uint32_t>(icol), num_rows_to_compare, sel_left_maybe_null,
            left_to_right_map, ctx, col, rows, are_cols_in_encoding_order,
            is_first_column ? match_bytevector_A : match_bytevector_B);
      }
      if (!is_first_column) {
        AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B);
      }
      is_first_column = false;
      ++ivarbinary;
    }
  }

  util::bit_util::bytes_to_bits(ctx->hardware_flags, num_rows_to_compare,
                                match_bytevector_A, match_bitvector);

  if (out_match_bitvector_maybe_null) {
    DCHECK_EQ(out_num_rows, nullptr);
    DCHECK_EQ(out_sel_left_maybe_same, nullptr);
    memcpy(out_match_bitvector_maybe_null, match_bitvector,
           bit_util::BytesForBits(num_rows_to_compare));
  } else {
    if (sel_left_maybe_null) {
      int out_num_rows_int;
      util::bit_util::bits_filter_indexes(0, ctx->hardware_flags, num_rows_to_compare,
                                          match_bitvector, sel_left_maybe_null,
                                          &out_num_rows_int, out_sel_left_maybe_same);
      *out_num_rows = out_num_rows_int;
    } else {
      int out_num_rows_int;
      util::bit_util::bits_to_indexes(0, ctx->hardware_flags, num_rows_to_compare,
                                      match_bitvector, &out_num_rows_int,
                                      out_sel_left_maybe_same);
      *out_num_rows = out_num_rows_int;
    }
  }
}