in util/filter_bench.cc [589:787]
double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
TestMode mode) {
for (auto &info : infos_) {
info.outside_queries_ = 0;
info.false_positives_ = 0;
}
auto dry_run_hash_fn = DryRunNoHash;
if (!FLAGS_net_includes_hashing) {
if (FLAGS_impl < 2 || FLAGS_use_plain_table_bloom) {
dry_run_hash_fn = DryRunHash32;
} else {
dry_run_hash_fn = DryRunHash64;
}
}
uint32_t num_infos = static_cast<uint32_t>(infos_.size());
uint32_t dry_run_hash = 0;
uint64_t max_queries = static_cast<uint64_t>(m_queries_ * 1000000 + 0.50);
// Some filters may be considered secondary in order to implement skewed
// queries. num_primary_filters is the number that are to be treated as
// equal, and any remainder will be treated as secondary.
uint32_t num_primary_filters = num_infos;
// The proportion (when divided by 2^32 - 1) of filter queries going to
// the primary filters (default = all). The remainder of queries are
// against secondary filters.
uint32_t primary_filter_threshold = 0xffffffff;
if (mode == kSingleFilter) {
// 100% of queries to 1 filter
num_primary_filters = 1;
} else if (mode == kFiftyOneFilter) {
if (num_infos < 50) {
return 0.0; // skip
}
// 50% of queries
primary_filter_threshold /= 2;
// to 1% of filters
num_primary_filters = (num_primary_filters + 99) / 100;
} else if (mode == kEightyTwentyFilter) {
if (num_infos < 5) {
return 0.0; // skip
}
// 80% of queries
primary_filter_threshold = primary_filter_threshold / 5 * 4;
// to 20% of filters
num_primary_filters = (num_primary_filters + 4) / 5;
} else if (mode == kRandomFilter) {
if (num_infos == 1) {
return 0.0; // skip
}
}
uint32_t batch_size = 1;
std::unique_ptr<Slice[]> batch_slices;
std::unique_ptr<Slice *[]> batch_slice_ptrs;
std::unique_ptr<bool[]> batch_results;
if (mode == kBatchPrepared || mode == kBatchUnprepared) {
batch_size = static_cast<uint32_t>(kms_.size());
}
batch_slices.reset(new Slice[batch_size]);
batch_slice_ptrs.reset(new Slice *[batch_size]);
batch_results.reset(new bool[batch_size]);
for (uint32_t i = 0; i < batch_size; ++i) {
batch_results[i] = false;
batch_slice_ptrs[i] = &batch_slices[i];
}
ROCKSDB_NAMESPACE::StopWatchNano timer(
ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
for (uint64_t q = 0; q < max_queries; q += batch_size) {
bool inside_this_time = random_.Next() <= inside_threshold;
uint32_t filter_index;
if (random_.Next() <= primary_filter_threshold) {
filter_index = random_.Uniformish(num_primary_filters);
} else {
// secondary
filter_index = num_primary_filters +
random_.Uniformish(num_infos - num_primary_filters);
}
FilterInfo &info = infos_[filter_index];
for (uint32_t i = 0; i < batch_size; ++i) {
if (inside_this_time) {
batch_slices[i] =
kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_));
} else {
batch_slices[i] =
kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_) |
uint32_t{0x80000000});
info.outside_queries_++;
}
}
// TODO: implement batched interface to full block reader
// TODO: implement batched interface to plain table bloom
if (mode == kBatchPrepared && !FLAGS_use_full_block_reader &&
!FLAGS_use_plain_table_bloom) {
for (uint32_t i = 0; i < batch_size; ++i) {
batch_results[i] = false;
}
if (dry_run) {
for (uint32_t i = 0; i < batch_size; ++i) {
batch_results[i] = true;
dry_run_hash += dry_run_hash_fn(batch_slices[i]);
}
} else {
info.reader_->MayMatch(batch_size, batch_slice_ptrs.get(),
batch_results.get());
}
for (uint32_t i = 0; i < batch_size; ++i) {
if (inside_this_time) {
ALWAYS_ASSERT(batch_results[i]);
} else {
info.false_positives_ += batch_results[i];
}
}
} else {
for (uint32_t i = 0; i < batch_size; ++i) {
bool may_match;
if (FLAGS_use_plain_table_bloom) {
if (dry_run) {
dry_run_hash += dry_run_hash_fn(batch_slices[i]);
may_match = true;
} else {
uint32_t hash = GetSliceHash(batch_slices[i]);
may_match = info.plain_table_bloom_->MayContainHash(hash);
}
} else if (FLAGS_use_full_block_reader) {
if (dry_run) {
dry_run_hash += dry_run_hash_fn(batch_slices[i]);
may_match = true;
} else {
may_match = info.full_block_reader_->KeyMayMatch(
batch_slices[i],
/*prefix_extractor=*/nullptr,
/*block_offset=*/ROCKSDB_NAMESPACE::kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr);
}
} else {
if (dry_run) {
dry_run_hash += dry_run_hash_fn(batch_slices[i]);
may_match = true;
} else {
may_match = info.reader_->MayMatch(batch_slices[i]);
}
}
if (inside_this_time) {
ALWAYS_ASSERT(may_match);
} else {
info.false_positives_ += may_match;
}
}
}
}
uint64_t elapsed_nanos = timer.ElapsedNanos();
double ns = double(elapsed_nanos) / max_queries;
if (!FLAGS_quick) {
if (dry_run) {
// Printing part of hash prevents dry run components from being optimized
// away by compiler
std::cout << " Dry run (" << std::hex << (dry_run_hash & 0xfffff)
<< std::dec << ") ";
} else {
std::cout << " Gross filter ";
}
std::cout << "ns/op: " << ns << std::endl;
}
if (!dry_run) {
fp_rate_report_.str("");
uint64_t q = 0;
uint64_t fp = 0;
double worst_fp_rate = 0.0;
double best_fp_rate = 1.0;
for (auto &info : infos_) {
q += info.outside_queries_;
fp += info.false_positives_;
if (info.outside_queries_ > 0) {
double fp_rate = double(info.false_positives_) / info.outside_queries_;
worst_fp_rate = std::max(worst_fp_rate, fp_rate);
best_fp_rate = std::min(best_fp_rate, fp_rate);
}
}
fp_rate_report_ << " Average FP rate %: " << 100.0 * fp / q << std::endl;
if (!FLAGS_quick && !FLAGS_best_case) {
fp_rate_report_ << " Worst FP rate %: " << 100.0 * worst_fp_rate
<< std::endl;
fp_rate_report_ << " Best FP rate %: " << 100.0 * best_fp_rate
<< std::endl;
fp_rate_report_ << " Best possible bits/key: "
<< -std::log(double(fp) / q) / std::log(2.0) << std::endl;
}
}
return ns;
}