kll_helper::compress_result kll_helper::general_compress()

in kll/include/kll_helper_impl.hpp [206:294]


kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m, uint8_t num_levels_in, T* items,
        uint32_t* in_levels, uint32_t* out_levels, bool is_level_zero_sorted)
{
  if (num_levels_in == 0) throw std::invalid_argument("num_levels_in == 0"); // things are too weird if zero levels are allowed
  const uint32_t starting_item_count = in_levels[num_levels_in] - in_levels[0];
  uint8_t current_num_levels = num_levels_in;
  uint32_t current_item_count = starting_item_count; // decreases with each compaction
  uint32_t target_item_count = compute_total_capacity(k, m, current_num_levels); // increases if we add levels
  bool done_yet = false;
  out_levels[0] = 0;
  uint8_t current_level = 0;
  while (!done_yet) {

    // If we are at the current top level, add an empty level above it for convenience,
    // but do not increment num_levels until later
    if (current_level == (current_num_levels - 1)) {
      in_levels[current_level + 2] = in_levels[current_level + 1];
    }

    const auto raw_beg = in_levels[current_level];
    const auto raw_lim = in_levels[current_level + 1];
    const auto raw_pop = raw_lim - raw_beg;

    if ((current_item_count < target_item_count) || (raw_pop < level_capacity(k, current_num_levels, current_level, m))) {
      // move level over as is
      // make sure we are not moving data upwards
      if (raw_beg < out_levels[current_level]) throw std::logic_error("wrong move");
      std::move(items + raw_beg, items + raw_lim, items + out_levels[current_level]);
      out_levels[current_level + 1] = out_levels[current_level] + raw_pop;
    } else {
      // The sketch is too full AND this level is too full, so we compact it
      // Note: this can add a level and thus change the sketches capacities

      const auto pop_above = in_levels[current_level + 2] - raw_lim;
      const bool odd_pop = is_odd(raw_pop);
      const auto adj_beg = odd_pop ? 1 + raw_beg : raw_beg;
      const auto adj_pop = odd_pop ? raw_pop - 1 : raw_pop;
      const auto half_adj_pop = adj_pop / 2;

      if (odd_pop) { // move one guy over
        items[out_levels[current_level]] = std::move(items[raw_beg]);
        out_levels[current_level + 1] = out_levels[current_level] + 1;
      } else { // even number of items
        out_levels[current_level + 1] = out_levels[current_level];
      }

      // level zero might not be sorted, so we must sort it if we wish to compact it
      if ((current_level == 0) && !is_level_zero_sorted) {
        std::sort(items + adj_beg, items + adj_beg + adj_pop, C());
      }

      if (pop_above == 0) { // Level above is empty, so halve up
        randomly_halve_up(items, adj_beg, adj_pop);
      } else { // Level above is nonempty, so halve down, then merge up
        randomly_halve_down(items, adj_beg, adj_pop);
        merge_sorted_arrays<T, C>(items, adj_beg, half_adj_pop, raw_lim, pop_above, adj_beg + half_adj_pop);
      }

      // track the fact that we just eliminated some data
      current_item_count -= half_adj_pop;

      // adjust the boundaries of the level above
      in_levels[current_level + 1] = in_levels[current_level + 1] - half_adj_pop;

      // increment num_levels if we just compacted the old top level
      // this creates some more capacity (the size of the new bottom level)
      if (current_level == (current_num_levels - 1)) {
        current_num_levels++;
        target_item_count += level_capacity(k, current_num_levels, 0, m);
      }

    } // end of code for compacting a level

    // determine whether we have processed all levels yet (including any new levels that we created)

    if (current_level == (current_num_levels - 1)) done_yet = true;
    current_level++;
  } // end of loop over levels

  if ((out_levels[current_num_levels] - out_levels[0]) != current_item_count) throw std::logic_error("inconsistent state");

  for (uint32_t i = current_item_count; i < starting_item_count; i++) items[i].~T();

  compress_result result;
  result.final_num_levels = current_num_levels;
  result.final_capacity = target_item_count;
  result.final_num_items = current_item_count;
  return result;
}