in cpp/src/frequent_items_sketch_timing_profile.cpp [35:136]
void frequent_items_sketch_timing_profile::run() {
const unsigned lg_min_stream_len = 0;
const unsigned lg_max_stream_len = 23;
const unsigned ppo = 16;
const unsigned lg_max_trials = 14;
const unsigned lg_min_trials = 8;
const unsigned lg_max_sketch_size = 10;
const unsigned zipf_lg_range = 13; // range: 8K values for 1K sketch
const double zipf_exponent = 0.7;
const double geom_p = 0.005;
std::default_random_engine generator(std::chrono::system_clock::now().time_since_epoch().count());
std::geometric_distribution<long long> geometric_distribution(geom_p);
zipf_distribution zipf(1 << zipf_lg_range, zipf_exponent);
std::cout << "StreamLen\tTrials\tBuild\tUpdate\tSerStream\tDeserStream\tSerBytes\tDeserBytes\tMaxErr\tNumItems\tSizeBytes" << std::endl;
size_t stream_length = 1 << lg_min_stream_len;
while (stream_length <= 1 << lg_max_stream_len) {
std::chrono::nanoseconds build_time_ns(0);
std::chrono::nanoseconds update_time_ns(0);
std::chrono::nanoseconds stream_serialize_time_ns(0);
std::chrono::nanoseconds stream_deserialize_time_ns(0);
std::chrono::nanoseconds bytes_serialize_time_ns(0);
std::chrono::nanoseconds bytes_deserialize_time_ns(0);
unsigned num_items = 0;
size_t size_bytes = 0;
size_t max_error = 0;
const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials);
long long* values = new long long[stream_length];
for (size_t i = 0; i < num_trials; i++) {
const auto start_build(std::chrono::high_resolution_clock::now());
frequent_longs_sketch sketch(lg_max_sketch_size);
const auto finish_build(std::chrono::high_resolution_clock::now());
build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build);
// prepare values to exclude cost of random generator from the update loop
for (size_t j = 0; j < stream_length; j++) {
values[j] = zipf.sample();
}
const auto start_update(std::chrono::high_resolution_clock::now());
for (size_t j = 0; j < stream_length; ++j) {
sketch.update(values[j]);
}
const auto finish_update(std::chrono::high_resolution_clock::now());
update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update);
{
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
auto start_stream_serialize(std::chrono::high_resolution_clock::now());
sketch.serialize(s);
const auto finish_stream_serialize(std::chrono::high_resolution_clock::now());
stream_serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_stream_serialize - start_stream_serialize);
const auto start_stream_deserialize(std::chrono::high_resolution_clock::now());
auto deserialized_sketch = frequent_longs_sketch::deserialize(s);
const auto finish_stream_deserialize(std::chrono::high_resolution_clock::now());
stream_deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_stream_deserialize - start_stream_deserialize);
size_bytes += s.tellp();
}
{
auto start_bytes_serialize(std::chrono::high_resolution_clock::now());
auto bytes = sketch.serialize();
const auto finish_bytes_serialize(std::chrono::high_resolution_clock::now());
bytes_serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_bytes_serialize - start_bytes_serialize);
const auto start_bytes_deserialize(std::chrono::high_resolution_clock::now());
auto deserialized_sketch = frequent_longs_sketch::deserialize(bytes.data(), bytes.size());
const auto finish_bytes_deserialize(std::chrono::high_resolution_clock::now());
bytes_deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_bytes_deserialize - start_bytes_deserialize);
}
num_items += sketch.get_num_active_items();
max_error += sketch.get_maximum_error();
}
delete [] values;
std::cout << stream_length << "\t"
<< num_trials << "\t"
<< (double) build_time_ns.count() / num_trials << "\t"
<< (double) update_time_ns.count() / num_trials / stream_length << "\t"
<< (double) stream_serialize_time_ns.count() / num_trials << "\t"
<< (double) stream_deserialize_time_ns.count() / num_trials << "\t"
<< (double) bytes_serialize_time_ns.count() / num_trials << "\t"
<< (double) bytes_deserialize_time_ns.count() / num_trials << "\t"
<< (double) max_error / num_trials << "\t"
<< (double) num_items / num_trials << "\t"
<< (double) size_bytes / num_trials << "\t"
<< std::endl;
stream_length = pwr_2_law_next(ppo, stream_length);
}
}