auto compact_theta_sketch_parser::parse()

in theta/include/compact_theta_sketch_parser_impl.hpp [36:134]


auto compact_theta_sketch_parser<dummy>::parse(const void* ptr, size_t size, uint64_t seed, bool dump_on_error) -> compact_theta_sketch_data {
  check_memory_size(ptr, size, 8, dump_on_error);
  checker<true>::check_sketch_type(reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_TYPE_BYTE], COMPACT_SKETCH_TYPE);
  uint8_t serial_version = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_SERIAL_VERSION_BYTE];
  switch(serial_version) {
  case 4: {
    // version 4 sketches are ordered and always have entries (single item in exact mode is v3)
    const uint16_t seed_hash = reinterpret_cast<const uint16_t*>(ptr)[COMPACT_SKETCH_SEED_HASH_U16];
    checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
    const bool has_theta = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] > 1;
    uint64_t theta = theta_constants::MAX_THETA;
    if (has_theta) {
      check_memory_size(ptr, size, 16, dump_on_error);
      theta = reinterpret_cast<const uint64_t*>(ptr)[COMPACT_SKETCH_V4_THETA_U64];
    }
    const uint8_t num_entries_bytes = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_V4_NUM_ENTRIES_BYTES_BYTE];
    size_t data_offset_bytes = has_theta ? COMPACT_SKETCH_V4_PACKED_DATA_ESTIMATION_BYTE : COMPACT_SKETCH_V4_PACKED_DATA_EXACT_BYTE;
    check_memory_size(ptr, size, data_offset_bytes + num_entries_bytes, dump_on_error);
    uint32_t num_entries = 0;
    const uint8_t* num_entries_ptr = reinterpret_cast<const uint8_t*>(ptr) + data_offset_bytes;
    for (unsigned i = 0; i < num_entries_bytes; ++i) {
      num_entries |= (*num_entries_ptr++) << (i << 3);
    }
    data_offset_bytes += num_entries_bytes;
    const uint8_t entry_bits = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_V4_ENTRY_BITS_BYTE];
    const size_t expected_bits = entry_bits * num_entries;
    const size_t expected_size_bytes = data_offset_bytes + whole_bytes_to_hold_bits(expected_bits);
    check_memory_size(ptr, size, expected_size_bytes, dump_on_error);
    return {false, true, seed_hash, num_entries, theta,
      reinterpret_cast<const uint8_t*>(ptr) + data_offset_bytes, entry_bits};
  }
  case 3: {
      uint64_t theta = theta_constants::MAX_THETA;
      const uint16_t seed_hash = reinterpret_cast<const uint16_t*>(ptr)[COMPACT_SKETCH_SEED_HASH_U16];
      if (reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_FLAGS_BYTE] & (1 << COMPACT_SKETCH_IS_EMPTY_FLAG)) {
        return {true, true, seed_hash, 0, theta, nullptr, 64};
      }
      checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
      const bool has_theta = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] > 2;
      if (has_theta) {
        check_memory_size(ptr, size, (COMPACT_SKETCH_THETA_U64 + 1) * sizeof(uint64_t), dump_on_error);
        theta = reinterpret_cast<const uint64_t*>(ptr)[COMPACT_SKETCH_THETA_U64];
      }
      if (reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] == 1) {
        check_memory_size(ptr, size, 16, dump_on_error);
        return {false, true, seed_hash, 1, theta, reinterpret_cast<const uint64_t*>(ptr) + COMPACT_SKETCH_SINGLE_ENTRY_U64, 64};
      }
      const uint32_t num_entries = reinterpret_cast<const uint32_t*>(ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32];
      const size_t entries_start_u64 = has_theta ? COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 : COMPACT_SKETCH_ENTRIES_EXACT_U64;
      const uint64_t* entries = reinterpret_cast<const uint64_t*>(ptr) + entries_start_u64;
      const size_t expected_size_bytes = (entries_start_u64 + num_entries) * sizeof(uint64_t);
      check_memory_size(ptr, size, expected_size_bytes, dump_on_error);
      const bool is_ordered = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_FLAGS_BYTE] & (1 << COMPACT_SKETCH_IS_ORDERED_FLAG);
      return {false, is_ordered, seed_hash, num_entries, theta, entries, 64};
  }
  case 1:  {
      uint16_t seed_hash = compute_seed_hash(seed);
      const uint32_t num_entries = reinterpret_cast<const uint32_t*>(ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32];
      uint64_t theta = reinterpret_cast<const uint64_t*>(ptr)[COMPACT_SKETCH_THETA_U64];
      bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
      if (is_empty) return {true, true, seed_hash, 0, theta, nullptr, 64};
      const uint64_t* entries = reinterpret_cast<const uint64_t*>(ptr) + COMPACT_SKETCH_ENTRIES_ESTIMATION_U64;
      const size_t expected_size_bytes = (COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 + num_entries) * sizeof(uint64_t);
      check_memory_size(ptr, size, expected_size_bytes, dump_on_error);
      return {false, true, seed_hash, num_entries, theta, entries, 64};
  }
  case 2:  {
      uint8_t preamble_size = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE];
      const uint16_t seed_hash = reinterpret_cast<const uint16_t*>(ptr)[COMPACT_SKETCH_SEED_HASH_U16];
      checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
      if (preamble_size == 1) {
          return {true, true, seed_hash, 0, theta_constants::MAX_THETA, nullptr, 64};
      } else if (preamble_size == 2) {
          const uint32_t num_entries = reinterpret_cast<const uint32_t*>(ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32];
          if (num_entries == 0) {
              return {true, true, seed_hash, 0, theta_constants::MAX_THETA, nullptr, 64};
          } else {
              const size_t expected_size_bytes = (preamble_size + num_entries) << 3;
              check_memory_size(ptr, size, expected_size_bytes, dump_on_error);
              const uint64_t* entries = reinterpret_cast<const uint64_t*>(ptr) + COMPACT_SKETCH_ENTRIES_EXACT_U64;
              return {false, true, seed_hash, num_entries, theta_constants::MAX_THETA, entries, 64};
          }
      } else if (preamble_size == 3) {
          const uint32_t num_entries = reinterpret_cast<const uint32_t*>(ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32];
          uint64_t theta = reinterpret_cast<const uint64_t*>(ptr)[COMPACT_SKETCH_THETA_U64];
          bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
          if (is_empty) return {true, true, seed_hash, 0, theta, nullptr, 64};
          const uint64_t* entries = reinterpret_cast<const uint64_t*>(ptr) + COMPACT_SKETCH_ENTRIES_ESTIMATION_U64;
          const size_t expected_size_bytes = (COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 + num_entries) * sizeof(uint64_t);
          check_memory_size(ptr, size, expected_size_bytes, dump_on_error);
          return {false, true, seed_hash, num_entries, theta, entries, 64};
      } else {
          throw std::invalid_argument(std::to_string(preamble_size) + " longs of premable, but expected 1, 2, or 3");
      }
  }
  default:
    throw std::invalid_argument("unsupported serial version " + std::to_string(serial_version));
  }
}