var_opt_sketch var_opt_sketch::deserialize()

in sampling/include/var_opt_sketch_impl.hpp [463:555]


var_opt_sketch<T, A> var_opt_sketch<T, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
  ensure_minimum_memory(size, 8);
  const char* ptr = static_cast<const char*>(bytes);
  const char* base = ptr;
  const char* end_ptr = ptr + size;
  uint8_t first_byte;
  ptr += copy_from_mem(ptr, first_byte);
  uint8_t preamble_longs = first_byte & 0x3f;
  resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
  uint8_t serial_version;
  ptr += copy_from_mem(ptr, serial_version);
  uint8_t family_id;
  ptr += copy_from_mem(ptr, family_id);
  uint8_t flags;
  ptr += copy_from_mem(ptr, flags);
  uint32_t k;
  ptr += copy_from_mem(ptr, k);

  check_preamble_longs(preamble_longs, flags);
  check_family_and_serialization_version(family_id, serial_version);
  ensure_minimum_memory(size, preamble_longs << 3);

  const bool is_empty = flags & EMPTY_FLAG_MASK;
  const bool is_gadget = flags & GADGET_FLAG_MASK;

  if (is_empty) {
    return var_opt_sketch(k, rf, is_gadget, allocator);
  }

  // second and third prelongs
  uint64_t n;
  uint32_t h, r;
  ptr += copy_from_mem(ptr, n);
  ptr += copy_from_mem(ptr, h);
  ptr += copy_from_mem(ptr, r);

  const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
  
  // current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
  double total_wt_r = 0.0;
  if (preamble_longs == PREAMBLE_LONGS_FULL) {
    ptr += copy_from_mem(ptr, total_wt_r);
    if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
      throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
       "Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
    }
  } else {
    total_wt_r = 0.0;
  }

  // read the first h_ weights, fill in rest of array with -1.0
  check_memory_size(ptr - base + (h * sizeof(double)), size);
  std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
      weights_deleter(array_size, allocator));
  double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
  ptr += copy_from_mem(ptr, wts, h * sizeof(double));
  for (size_t i = 0; i < h; ++i) {
    if (!(wts[i] > 0.0)) {
      throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
    }
  }
  std::fill(wts + h, wts + array_size, -1.0);
  
  // read the first h_ marks as packed bytes iff we have a gadget
  uint32_t num_marks_in_h = 0;
  std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size, allocator));
  if (is_gadget) {
    uint8_t val = 0;
    marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
    const size_t size_marks = (h / 8) + (h % 8 > 0 ? 1 : 0);
    check_memory_size(ptr - base + size_marks, size);
    for (uint32_t i = 0; i < h; ++i) {
     if ((i & 0x7) == 0x0) { // should trigger on first iteration
        ptr += copy_from_mem(ptr, val);
      }
      marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
      num_marks_in_h += (marks.get()[i] ? 1 : 0);
    }
  }

  // read the sample items, skipping the gap. Either h_ or r_ may be 0
  items_deleter deleter(array_size, allocator);
  std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
  
  ptr += sd.deserialize(ptr, end_ptr - ptr, items.get(), h);
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
  
  ptr += sd.deserialize(ptr, end_ptr - ptr, &(items.get()[h + 1]), r);
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid

  return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
                        std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
}