var_opt_sketch var_opt_sketch::deserialize()

in sampling/include/var_opt_sketch_impl.hpp [559:640]


var_opt_sketch<T, A> var_opt_sketch<T, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
  const auto first_byte = read<uint8_t>(is);
  uint8_t preamble_longs = first_byte & 0x3f;
  const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
  const auto serial_version = read<uint8_t>(is);
  const auto family_id = read<uint8_t>(is);
  const auto flags = read<uint8_t>(is);
  const auto k = read<uint32_t>(is);

  check_preamble_longs(preamble_longs, flags);
  check_family_and_serialization_version(family_id, serial_version);

  const bool is_empty = flags & EMPTY_FLAG_MASK;
  const bool is_gadget = flags & GADGET_FLAG_MASK;

  if (is_empty) {
    if (!is.good())
      throw std::runtime_error("error reading from std::istream"); 
    else
      return var_opt_sketch(k, rf, is_gadget, allocator);
  }

  // second and third prelongs
  const auto n = read<uint64_t>(is);
  const auto h = read<uint32_t>(is);
  const auto r = read<uint32_t>(is);

  const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);

  // current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
  double total_wt_r = 0.0;
  if (preamble_longs == PREAMBLE_LONGS_FULL) { 
    total_wt_r = read<double>(is);
    if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
      throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
       "Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
    }
  }

  // read the first h weights, fill remainder with -1.0
  std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
      weights_deleter(array_size, allocator));
  double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
  read(is, wts, h * sizeof(double));
  for (size_t i = 0; i < h; ++i) {
    if (!(wts[i] > 0.0)) {
      throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
    }
  }
  std::fill(wts + h, wts + array_size, -1.0);

  // read the first h_ marks as packed bytes iff we have a gadget
  uint32_t num_marks_in_h = 0;
  std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size, allocator));
  if (is_gadget) {
    marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
    uint8_t val = 0;
    for (uint32_t i = 0; i < h; ++i) {
      if ((i & 0x7) == 0x0) { // should trigger on first iteration
        val = read<uint8_t>(is);
      }
      marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
      num_marks_in_h += (marks.get()[i] ? 1 : 0);
    }
  }

  // read the sample items, skipping the gap. Either h or r may be 0
  items_deleter deleter(array_size, allocator);
  std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);

  sd.deserialize(is, items.get(), h); // aka &data_[0]
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid

  sd.deserialize(is, &(items.get()[h + 1]), r);
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid

  if (!is.good())
    throw std::runtime_error("error reading from std::istream"); 

  return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
                        std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
}