double FilterBench::RandomQueryTest()

in util/filter_bench.cc [589:787]


double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
                                    TestMode mode) {
  for (auto &info : infos_) {
    info.outside_queries_ = 0;
    info.false_positives_ = 0;
  }

  auto dry_run_hash_fn = DryRunNoHash;
  if (!FLAGS_net_includes_hashing) {
    if (FLAGS_impl < 2 || FLAGS_use_plain_table_bloom) {
      dry_run_hash_fn = DryRunHash32;
    } else {
      dry_run_hash_fn = DryRunHash64;
    }
  }

  uint32_t num_infos = static_cast<uint32_t>(infos_.size());
  uint32_t dry_run_hash = 0;
  uint64_t max_queries = static_cast<uint64_t>(m_queries_ * 1000000 + 0.50);
  // Some filters may be considered secondary in order to implement skewed
  // queries. num_primary_filters is the number that are to be treated as
  // equal, and any remainder will be treated as secondary.
  uint32_t num_primary_filters = num_infos;
  // The proportion (when divided by 2^32 - 1) of filter queries going to
  // the primary filters (default = all). The remainder of queries are
  // against secondary filters.
  uint32_t primary_filter_threshold = 0xffffffff;
  if (mode == kSingleFilter) {
    // 100% of queries to 1 filter
    num_primary_filters = 1;
  } else if (mode == kFiftyOneFilter) {
    if (num_infos < 50) {
      return 0.0;  // skip
    }
    // 50% of queries
    primary_filter_threshold /= 2;
    // to 1% of filters
    num_primary_filters = (num_primary_filters + 99) / 100;
  } else if (mode == kEightyTwentyFilter) {
    if (num_infos < 5) {
      return 0.0;  // skip
    }
    // 80% of queries
    primary_filter_threshold = primary_filter_threshold / 5 * 4;
    // to 20% of filters
    num_primary_filters = (num_primary_filters + 4) / 5;
  } else if (mode == kRandomFilter) {
    if (num_infos == 1) {
      return 0.0;  // skip
    }
  }
  uint32_t batch_size = 1;
  std::unique_ptr<Slice[]> batch_slices;
  std::unique_ptr<Slice *[]> batch_slice_ptrs;
  std::unique_ptr<bool[]> batch_results;
  if (mode == kBatchPrepared || mode == kBatchUnprepared) {
    batch_size = static_cast<uint32_t>(kms_.size());
  }

  batch_slices.reset(new Slice[batch_size]);
  batch_slice_ptrs.reset(new Slice *[batch_size]);
  batch_results.reset(new bool[batch_size]);
  for (uint32_t i = 0; i < batch_size; ++i) {
    batch_results[i] = false;
    batch_slice_ptrs[i] = &batch_slices[i];
  }

  ROCKSDB_NAMESPACE::StopWatchNano timer(
      ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);

  for (uint64_t q = 0; q < max_queries; q += batch_size) {
    bool inside_this_time = random_.Next() <= inside_threshold;

    uint32_t filter_index;
    if (random_.Next() <= primary_filter_threshold) {
      filter_index = random_.Uniformish(num_primary_filters);
    } else {
      // secondary
      filter_index = num_primary_filters +
                     random_.Uniformish(num_infos - num_primary_filters);
    }
    FilterInfo &info = infos_[filter_index];
    for (uint32_t i = 0; i < batch_size; ++i) {
      if (inside_this_time) {
        batch_slices[i] =
            kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_));
      } else {
        batch_slices[i] =
            kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_) |
                                             uint32_t{0x80000000});
        info.outside_queries_++;
      }
    }
    // TODO: implement batched interface to full block reader
    // TODO: implement batched interface to plain table bloom
    if (mode == kBatchPrepared && !FLAGS_use_full_block_reader &&
        !FLAGS_use_plain_table_bloom) {
      for (uint32_t i = 0; i < batch_size; ++i) {
        batch_results[i] = false;
      }
      if (dry_run) {
        for (uint32_t i = 0; i < batch_size; ++i) {
          batch_results[i] = true;
          dry_run_hash += dry_run_hash_fn(batch_slices[i]);
        }
      } else {
        info.reader_->MayMatch(batch_size, batch_slice_ptrs.get(),
                               batch_results.get());
      }
      for (uint32_t i = 0; i < batch_size; ++i) {
        if (inside_this_time) {
          ALWAYS_ASSERT(batch_results[i]);
        } else {
          info.false_positives_ += batch_results[i];
        }
      }
    } else {
      for (uint32_t i = 0; i < batch_size; ++i) {
        bool may_match;
        if (FLAGS_use_plain_table_bloom) {
          if (dry_run) {
            dry_run_hash += dry_run_hash_fn(batch_slices[i]);
            may_match = true;
          } else {
            uint32_t hash = GetSliceHash(batch_slices[i]);
            may_match = info.plain_table_bloom_->MayContainHash(hash);
          }
        } else if (FLAGS_use_full_block_reader) {
          if (dry_run) {
            dry_run_hash += dry_run_hash_fn(batch_slices[i]);
            may_match = true;
          } else {
            may_match = info.full_block_reader_->KeyMayMatch(
                batch_slices[i],
                /*prefix_extractor=*/nullptr,
                /*block_offset=*/ROCKSDB_NAMESPACE::kNotValid,
                /*no_io=*/false, /*const_ikey_ptr=*/nullptr,
                /*get_context=*/nullptr,
                /*lookup_context=*/nullptr);
          }
        } else {
          if (dry_run) {
            dry_run_hash += dry_run_hash_fn(batch_slices[i]);
            may_match = true;
          } else {
            may_match = info.reader_->MayMatch(batch_slices[i]);
          }
        }
        if (inside_this_time) {
          ALWAYS_ASSERT(may_match);
        } else {
          info.false_positives_ += may_match;
        }
      }
    }
  }

  uint64_t elapsed_nanos = timer.ElapsedNanos();
  double ns = double(elapsed_nanos) / max_queries;

  if (!FLAGS_quick) {
    if (dry_run) {
      // Printing part of hash prevents dry run components from being optimized
      // away by compiler
      std::cout << "    Dry run (" << std::hex << (dry_run_hash & 0xfffff)
                << std::dec << ") ";
    } else {
      std::cout << "    Gross filter    ";
    }
    std::cout << "ns/op: " << ns << std::endl;
  }

  if (!dry_run) {
    fp_rate_report_.str("");
    uint64_t q = 0;
    uint64_t fp = 0;
    double worst_fp_rate = 0.0;
    double best_fp_rate = 1.0;
    for (auto &info : infos_) {
      q += info.outside_queries_;
      fp += info.false_positives_;
      if (info.outside_queries_ > 0) {
        double fp_rate = double(info.false_positives_) / info.outside_queries_;
        worst_fp_rate = std::max(worst_fp_rate, fp_rate);
        best_fp_rate = std::min(best_fp_rate, fp_rate);
      }
    }
    fp_rate_report_ << "    Average FP rate %: " << 100.0 * fp / q << std::endl;
    if (!FLAGS_quick && !FLAGS_best_case) {
      fp_rate_report_ << "    Worst   FP rate %: " << 100.0 * worst_fp_rate
                      << std::endl;
      fp_rate_report_ << "    Best    FP rate %: " << 100.0 * best_fp_rate
                      << std::endl;
      fp_rate_report_ << "    Best possible bits/key: "
                      << -std::log(double(fp) / q) / std::log(2.0) << std::endl;
    }
  }
  return ns;
}