in util/filter_bench.cc [342:587]
void FilterBench::Go() {
if (FLAGS_use_plain_table_bloom && FLAGS_use_full_block_reader) {
throw std::runtime_error(
"Can't combine -use_plain_table_bloom and -use_full_block_reader");
}
if (FLAGS_use_plain_table_bloom) {
if (FLAGS_impl > 1) {
throw std::runtime_error(
"-impl must currently be >= 0 and <= 1 for Plain table");
}
} else {
if (FLAGS_impl == 1) {
throw std::runtime_error(
"Block-based filter not currently supported by filter_bench");
}
if (FLAGS_impl > 3) {
throw std::runtime_error(
"-impl must currently be 0, 2, or 3 for Block-based table");
}
}
if (FLAGS_vary_key_count_ratio < 0.0 || FLAGS_vary_key_count_ratio > 1.0) {
throw std::runtime_error("-vary_key_count_ratio must be >= 0.0 and <= 1.0");
}
// For example, average_keys_per_filter = 100, vary_key_count_ratio = 0.1.
// Varys up to +/- 10 keys. variance_range = 21 (generating value 0..20).
// variance_offset = 10, so value - offset average value is always 0.
const uint32_t variance_range =
1 + 2 * static_cast<uint32_t>(FLAGS_vary_key_count_ratio *
FLAGS_average_keys_per_filter);
const uint32_t variance_offset = variance_range / 2;
const std::vector<TestMode> &testModes =
FLAGS_best_case ? bestCaseTestModes
: FLAGS_quick ? quickTestModes : allTestModes;
m_queries_ = FLAGS_m_queries;
double working_mem_size_mb = FLAGS_working_mem_size_mb;
if (FLAGS_quick) {
m_queries_ /= 7.0;
} else if (FLAGS_best_case) {
m_queries_ /= 3.0;
working_mem_size_mb /= 10.0;
}
std::cout << "Building..." << std::endl;
std::unique_ptr<BuiltinFilterBitsBuilder> builder;
size_t total_memory_used = 0;
size_t total_size = 0;
size_t total_keys_added = 0;
#ifdef PREDICT_FP_RATE
double weighted_predicted_fp_rate = 0.0;
#endif
size_t max_total_keys;
size_t max_mem;
if (FLAGS_m_keys_total_max > 0) {
max_total_keys = static_cast<size_t>(1000000 * FLAGS_m_keys_total_max);
max_mem = SIZE_MAX;
} else {
max_total_keys = SIZE_MAX;
max_mem = static_cast<size_t>(1024 * 1024 * working_mem_size_mb);
}
ROCKSDB_NAMESPACE::StopWatchNano timer(
ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
infos_.clear();
while ((working_mem_size_mb == 0 || total_size < max_mem) &&
total_keys_added < max_total_keys) {
uint32_t filter_id = random_.Next();
uint32_t keys_to_add = FLAGS_average_keys_per_filter +
FastRange32(random_.Next(), variance_range) -
variance_offset;
if (max_total_keys - total_keys_added < keys_to_add) {
keys_to_add = static_cast<uint32_t>(max_total_keys - total_keys_added);
}
infos_.emplace_back();
FilterInfo &info = infos_.back();
info.filter_id_ = filter_id;
info.keys_added_ = keys_to_add;
if (FLAGS_use_plain_table_bloom) {
info.plain_table_bloom_.reset(new PlainTableBloomV1());
info.plain_table_bloom_->SetTotalBits(
&arena_, static_cast<uint32_t>(keys_to_add * FLAGS_bits_per_key),
FLAGS_impl, 0 /*huge_page*/, nullptr /*logger*/);
for (uint32_t i = 0; i < keys_to_add; ++i) {
uint32_t hash = GetSliceHash(kms_[0].Get(filter_id, i));
info.plain_table_bloom_->AddHash(hash);
}
info.filter_ = info.plain_table_bloom_->GetRawData();
} else {
if (!builder) {
builder.reset(
static_cast_with_check<BuiltinFilterBitsBuilder>(GetBuilder()));
}
for (uint32_t i = 0; i < keys_to_add; ++i) {
builder->AddKey(kms_[0].Get(filter_id, i));
}
info.filter_ =
builder->Finish(&info.owner_, &info.filter_construction_status);
if (info.filter_construction_status.ok()) {
info.filter_construction_status =
builder->MaybePostVerify(info.filter_);
}
if (!info.filter_construction_status.ok()) {
PrintError(info.filter_construction_status.ToString().c_str());
}
#ifdef PREDICT_FP_RATE
weighted_predicted_fp_rate +=
keys_to_add *
builder->EstimatedFpRate(keys_to_add, info.filter_.size());
#endif
if (FLAGS_new_builder) {
builder.reset();
}
info.reader_.reset(
table_options_.filter_policy->GetFilterBitsReader(info.filter_));
CachableEntry<ParsedFullFilterBlock> block(
new ParsedFullFilterBlock(table_options_.filter_policy.get(),
BlockContents(info.filter_)),
nullptr /* cache */, nullptr /* cache_handle */,
true /* own_value */);
info.full_block_reader_.reset(
new FullFilterBlockReader(table_.get(), std::move(block)));
}
total_size += info.filter_.size();
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
total_memory_used +=
malloc_usable_size(const_cast<char *>(info.filter_.data()));
#endif // ROCKSDB_MALLOC_USABLE_SIZE
total_keys_added += keys_to_add;
}
uint64_t elapsed_nanos = timer.ElapsedNanos();
double ns = double(elapsed_nanos) / total_keys_added;
std::cout << "Build avg ns/key: " << ns << std::endl;
std::cout << "Number of filters: " << infos_.size() << std::endl;
std::cout << "Total size (MB): " << total_size / 1024.0 / 1024.0 << std::endl;
if (total_memory_used > 0) {
std::cout << "Reported total allocated memory (MB): "
<< total_memory_used / 1024.0 / 1024.0 << std::endl;
std::cout << "Reported internal fragmentation: "
<< (total_memory_used - total_size) * 100.0 / total_size << "%"
<< std::endl;
}
double bpk = total_size * 8.0 / total_keys_added;
std::cout << "Bits/key stored: " << bpk << std::endl;
#ifdef PREDICT_FP_RATE
std::cout << "Predicted FP rate %: "
<< 100.0 * (weighted_predicted_fp_rate / total_keys_added)
<< std::endl;
#endif
if (!FLAGS_quick && !FLAGS_best_case) {
double tolerable_rate = std::pow(2.0, -(bpk - 1.0) / (1.4 + bpk / 50.0));
std::cout << "Best possible FP rate %: " << 100.0 * std::pow(2.0, -bpk)
<< std::endl;
std::cout << "Tolerable FP rate %: " << 100.0 * tolerable_rate << std::endl;
std::cout << "----------------------------" << std::endl;
std::cout << "Verifying..." << std::endl;
uint32_t outside_q_per_f =
static_cast<uint32_t>(m_queries_ * 1000000 / infos_.size());
uint64_t fps = 0;
for (uint32_t i = 0; i < infos_.size(); ++i) {
FilterInfo &info = infos_[i];
for (uint32_t j = 0; j < info.keys_added_; ++j) {
if (FLAGS_use_plain_table_bloom) {
uint32_t hash = GetSliceHash(kms_[0].Get(info.filter_id_, j));
ALWAYS_ASSERT(info.plain_table_bloom_->MayContainHash(hash));
} else {
ALWAYS_ASSERT(
info.reader_->MayMatch(kms_[0].Get(info.filter_id_, j)));
}
}
for (uint32_t j = 0; j < outside_q_per_f; ++j) {
if (FLAGS_use_plain_table_bloom) {
uint32_t hash =
GetSliceHash(kms_[0].Get(info.filter_id_, j | 0x80000000));
fps += info.plain_table_bloom_->MayContainHash(hash);
} else {
fps += info.reader_->MayMatch(
kms_[0].Get(info.filter_id_, j | 0x80000000));
}
}
}
std::cout << " No FNs :)" << std::endl;
double prelim_rate = double(fps) / outside_q_per_f / infos_.size();
std::cout << " Prelim FP rate %: " << (100.0 * prelim_rate) << std::endl;
if (!FLAGS_allow_bad_fp_rate) {
ALWAYS_ASSERT(prelim_rate < tolerable_rate);
}
}
std::cout << "----------------------------" << std::endl;
std::cout << "Mixed inside/outside queries..." << std::endl;
// 50% each inside and outside
uint32_t inside_threshold = UINT32_MAX / 2;
for (TestMode tm : testModes) {
random_.Seed(FLAGS_seed + 1);
double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
random_.Seed(FLAGS_seed + 1);
double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
std::cout << " " << TestModeToString(tm) << " net ns/op: " << (f - d)
<< std::endl;
}
if (!FLAGS_quick) {
std::cout << "----------------------------" << std::endl;
std::cout << "Inside queries (mostly)..." << std::endl;
// Do about 95% inside queries rather than 100% so that branch predictor
// can't give itself an artifically crazy advantage.
inside_threshold = UINT32_MAX / 20 * 19;
for (TestMode tm : testModes) {
random_.Seed(FLAGS_seed + 1);
double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
random_.Seed(FLAGS_seed + 1);
double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
std::cout << " " << TestModeToString(tm) << " net ns/op: " << (f - d)
<< std::endl;
}
std::cout << "----------------------------" << std::endl;
std::cout << "Outside queries (mostly)..." << std::endl;
// Do about 95% outside queries rather than 100% so that branch predictor
// can't give itself an artifically crazy advantage.
inside_threshold = UINT32_MAX / 20;
for (TestMode tm : testModes) {
random_.Seed(FLAGS_seed + 2);
double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
random_.Seed(FLAGS_seed + 2);
double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
std::cout << " " << TestModeToString(tm) << " net ns/op: " << (f - d)
<< std::endl;
}
}
std::cout << fp_rate_report_.str();
std::cout << "----------------------------" << std::endl;
std::cout << "Done. (For more info, run with -legend or -help.)" << std::endl;
}