in quantiles/include/quantiles_sketch_impl.hpp [474:570]
auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& serde,
const C& comparator, const A &allocator) -> quantiles_sketch {
ensure_minimum_memory(size, 8);
const char* ptr = static_cast<const char*>(bytes);
const char* end_ptr = static_cast<const char*>(bytes) + size;
uint8_t preamble_longs;
ptr += copy_from_mem(ptr, preamble_longs);
uint8_t serial_version;
ptr += copy_from_mem(ptr, serial_version);
uint8_t family_id;
ptr += copy_from_mem(ptr, family_id);
uint8_t flags_byte;
ptr += copy_from_mem(ptr, flags_byte);
uint16_t k;
ptr += copy_from_mem(ptr, k);
uint16_t unused;
ptr += copy_from_mem(ptr, unused);
check_k(k);
check_serial_version(serial_version); // a little redundant with the header check
check_family_id(family_id);
check_header_validity(preamble_longs, flags_byte, serial_version);
const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
if (is_empty) {
return quantiles_sketch(k, comparator, allocator);
}
ensure_minimum_memory(size, 16);
uint64_t items_seen;
ptr += copy_from_mem(ptr, items_seen);
const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;
optional<T> tmp; // space to deserialize min and max
optional<T> min_item;
optional<T> max_item;
ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
// serde call did not throw, repackage and cleanup
min_item.emplace(*tmp);
(*tmp).~T();
ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
// serde call did not throw, repackage and cleanup
max_item.emplace(*tmp);
(*tmp).~T();
if (serial_version == 1) {
uint64_t unused_long;
ptr += copy_from_mem(ptr, unused_long); // no longer used
}
// allocate buffers as needed
const uint8_t levels_needed = compute_levels_needed(k, items_seen);
const uint64_t bit_pattern = compute_bit_pattern(k, items_seen);
// Java provides a compact storage layout for a sketch of primitive doubles. The C++ version
// does not currently operate sketches in compact mode, but will only serialize as compact
// to avoid complications around serialization of empty values for generic type T. We also need
// to be able to ingest either serialized format from Java.
// load base buffer
const uint32_t bb_items = compute_base_buffer_items(k, items_seen);
uint32_t items_to_read = (levels_needed == 0 || is_compact) ? bb_items : 2 * k;
auto base_buffer_pair = deserialize_array(ptr, end_ptr - ptr, bb_items, 2 * k, serde, allocator);
ptr += base_buffer_pair.second;
if (items_to_read > bb_items) { // either equal or greater, never read fewer items
// read remaining items, only use to advance the pointer
auto extras = deserialize_array(ptr, end_ptr - ptr, items_to_read - bb_items, items_to_read - bb_items, serde, allocator);
ptr += extras.second;
}
// populate vector of Levels directly
VectorLevels levels(allocator);
levels.reserve(levels_needed);
if (levels_needed > 0) {
uint64_t working_pattern = bit_pattern;
for (size_t i = 0; i < levels_needed; ++i, working_pattern >>= 1) {
if ((working_pattern & 0x01) == 1) {
auto pair = deserialize_array(ptr, end_ptr - ptr, k, k, serde, allocator);
ptr += pair.second;
levels.push_back(std::move(pair.first));
} else {
Level level(allocator);
level.reserve(k);
levels.push_back(std::move(level));
}
}
}
return quantiles_sketch(k, items_seen, bit_pattern,
std::move(base_buffer_pair.first), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
comparator, allocator);
}