auto quantiles_sketch::deserialize()

in quantiles/include/quantiles_sketch_impl.hpp [474:570]


auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& serde,
    const C& comparator, const A &allocator) -> quantiles_sketch {
  ensure_minimum_memory(size, 8);
  const char* ptr = static_cast<const char*>(bytes);
  const char* end_ptr = static_cast<const char*>(bytes) + size;

  uint8_t preamble_longs;
  ptr += copy_from_mem(ptr, preamble_longs);
  uint8_t serial_version;
  ptr += copy_from_mem(ptr, serial_version);
  uint8_t family_id;
  ptr += copy_from_mem(ptr, family_id);
  uint8_t flags_byte;
  ptr += copy_from_mem(ptr, flags_byte);
  uint16_t k;
  ptr += copy_from_mem(ptr, k);
  uint16_t unused;
  ptr += copy_from_mem(ptr, unused);

  check_k(k);
  check_serial_version(serial_version); // a little redundant with the header check
  check_family_id(family_id);
  check_header_validity(preamble_longs, flags_byte, serial_version);

  const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
  if (is_empty) {
    return quantiles_sketch(k, comparator, allocator);
  }

  ensure_minimum_memory(size, 16);
  uint64_t items_seen;
  ptr += copy_from_mem(ptr, items_seen);

  const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
  const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;

  optional<T> tmp; // space to deserialize min and max
  optional<T> min_item;
  optional<T> max_item;

  ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
  // serde call did not throw, repackage and cleanup
  min_item.emplace(*tmp);
  (*tmp).~T();
  ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
  // serde call did not throw, repackage and cleanup
  max_item.emplace(*tmp);
  (*tmp).~T();

  if (serial_version == 1) {
    uint64_t unused_long;
    ptr += copy_from_mem(ptr, unused_long); // no longer used
  }

  // allocate buffers as needed
  const uint8_t levels_needed = compute_levels_needed(k, items_seen);
  const uint64_t bit_pattern = compute_bit_pattern(k, items_seen);

  // Java provides a compact storage layout for a sketch of primitive doubles. The C++ version
  // does not currently operate sketches in compact mode, but will only serialize as compact
  // to avoid complications around serialization of empty values for generic type T. We also need
  // to be able to ingest either serialized format from Java.

  // load base buffer
  const uint32_t bb_items = compute_base_buffer_items(k, items_seen);
  uint32_t items_to_read = (levels_needed == 0 || is_compact) ? bb_items : 2 * k;
  auto base_buffer_pair = deserialize_array(ptr, end_ptr - ptr, bb_items, 2 * k, serde, allocator);
  ptr += base_buffer_pair.second;
  if (items_to_read > bb_items) { // either equal or greater, never read fewer items
    // read remaining items, only use to advance the pointer
    auto extras = deserialize_array(ptr, end_ptr - ptr, items_to_read - bb_items, items_to_read - bb_items, serde, allocator);
    ptr += extras.second;
  }

  // populate vector of Levels directly
  VectorLevels levels(allocator);
  levels.reserve(levels_needed);
  if (levels_needed > 0) {
    uint64_t working_pattern = bit_pattern;
    for (size_t i = 0; i < levels_needed; ++i, working_pattern >>= 1) {
     
      if ((working_pattern & 0x01) == 1) {
        auto pair = deserialize_array(ptr, end_ptr - ptr, k, k, serde, allocator);
        ptr += pair.second;
        levels.push_back(std::move(pair.first));
      } else {
        Level level(allocator);
        level.reserve(k);
        levels.push_back(std::move(level));
      }
    }
  }

  return quantiles_sketch(k, items_seen, bit_pattern,
    std::move(base_buffer_pair.first), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
    comparator, allocator);
}