CS theta_set_difference_base::compute()

in theta/include/theta_set_difference_base_impl.hpp [39:82]


CS theta_set_difference_base<EN, EK, CS, A>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const {
  if (a.is_empty() || (a.get_num_retained() > 0 && b.is_empty())) return CS(a, ordered);
  if (a.get_seed_hash() != seed_hash_) throw std::invalid_argument("A seed hash mismatch");
  if (b.get_seed_hash() != seed_hash_) throw std::invalid_argument("B seed hash mismatch");

  const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
  std::vector<EN, A> entries(allocator_);
  bool is_empty = a.is_empty();

  if (b.get_num_retained() == 0) {
    std::copy_if(forward_begin(std::forward<FwdSketch>(a)), forward_end(std::forward<FwdSketch>(a)), std::back_inserter(entries),
        key_less_than<uint64_t, EN, EK>(theta));
  } else {
    if (a.is_ordered() && b.is_ordered()) { // sort-based
      std::set_difference(forward_begin(std::forward<FwdSketch>(a)), forward_end(std::forward<FwdSketch>(a)), b.begin(), b.end(),
          conditional_back_inserter(entries, key_less_than<uint64_t, EN, EK>(theta)), comparator());
    } else { // hash-based
      const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), hash_table::REBUILD_THRESHOLD);
      hash_table table(lg_size, lg_size, hash_table::resize_factor::X1, 1, 0, 0, allocator_); // theta and seed are not used here
      for (const auto& entry: b) {
        const uint64_t hash = EK()(entry);
        if (hash < theta) {
          table.insert(table.find(hash).first, hash);
        } else if (b.is_ordered()) {
          break; // early stop
        }
      }

      // scan A lookup B
      for (auto&& entry: a) {
        const uint64_t hash = EK()(entry);
        if (hash < theta) {
          auto result = table.find(hash);
          if (!result.second) entries.push_back(conditional_forward<FwdSketch>(entry));
        } else if (a.is_ordered()) {
          break; // early stop
        }
      }
    }
  }
  if (entries.empty() && theta == theta_constants::MAX_THETA) is_empty = true;
  if (ordered && !a.is_ordered()) std::sort(entries.begin(), entries.end(), comparator());
  return CS(is_empty, a.is_ordered() || ordered, seed_hash_, theta, std::move(entries));
}