auto quantiles_sketch::deserialize()

in quantiles/include/quantiles_sketch_impl.hpp [371:451]


auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde,
    const C& comparator, const A &allocator) -> quantiles_sketch {
  const auto preamble_longs = read<uint8_t>(is);
  const auto serial_version = read<uint8_t>(is);
  const auto family_id = read<uint8_t>(is);
  const auto flags_byte = read<uint8_t>(is);
  const auto k = read<uint16_t>(is);
  read<uint16_t>(is); // unused

  check_k(k);
  check_serial_version(serial_version); // a little redundant with the header check
  check_family_id(family_id);
  check_header_validity(preamble_longs, flags_byte, serial_version);

  if (!is.good()) throw std::runtime_error("error reading from std::istream");
  const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
  if (is_empty) {
    return quantiles_sketch(k, comparator, allocator);
  }

  const auto items_seen = read<uint64_t>(is);

  const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
  const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;

  optional<T> tmp; // space to deserialize min and max
  optional<T> min_item;
  optional<T> max_item;

  serde.deserialize(is, &*tmp, 1);
  // serde call did not throw, repackage and cleanup
  min_item.emplace(*tmp);
  (*tmp).~T();
  serde.deserialize(is, &*tmp, 1);
  // serde call did not throw, repackage and cleanup
  max_item.emplace(*tmp);
  (*tmp).~T();

  if (serial_version == 1) {
    read<uint64_t>(is); // no longer used
  }

  // allocate buffers as needed
  const uint8_t levels_needed = compute_levels_needed(k, items_seen);
  const uint64_t bit_pattern = compute_bit_pattern(k, items_seen);

  // Java provides a compact storage layout for a sketch of primitive doubles. The C++ version
  // does not currently operate sketches in compact mode, but will only serialize as compact
  // to avoid complications around serialization of empty values for generic type T. We also need
  // to be able to ingest either serialized format from Java.

  // load base buffer
  const uint32_t bb_items = compute_base_buffer_items(k, items_seen);
  uint32_t items_to_read = (levels_needed == 0 || is_compact) ? bb_items : 2 * k;
  Level base_buffer = deserialize_array(is, bb_items, 2 * k, serde, allocator);
  if (items_to_read > bb_items) { // either equal or greater, never read fewer items
    // read remaining items, but don't store them
    deserialize_array(is, items_to_read - bb_items, items_to_read - bb_items, serde, allocator);
  }

  // populate vector of Levels directly
  VectorLevels levels(allocator);
  levels.reserve(levels_needed);
  if (levels_needed > 0) {
    uint64_t working_pattern = bit_pattern;
    for (size_t i = 0; i < levels_needed; ++i, working_pattern >>= 1) {
      if ((working_pattern & 0x01) == 1) {
        Level level = deserialize_array(is, k, k, serde, allocator);
        levels.push_back(std::move(level));
      } else {
        Level level(allocator);
        level.reserve(k);
        levels.push_back(std::move(level));
      }
    }
  }

  return quantiles_sketch(k, items_seen, bit_pattern,
    std::move(base_buffer), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
    comparator, allocator);
}