void FilterBench::Go()

in util/filter_bench.cc [342:587]


void FilterBench::Go() {
  if (FLAGS_use_plain_table_bloom && FLAGS_use_full_block_reader) {
    throw std::runtime_error(
        "Can't combine -use_plain_table_bloom and -use_full_block_reader");
  }
  if (FLAGS_use_plain_table_bloom) {
    if (FLAGS_impl > 1) {
      throw std::runtime_error(
          "-impl must currently be >= 0 and <= 1 for Plain table");
    }
  } else {
    if (FLAGS_impl == 1) {
      throw std::runtime_error(
          "Block-based filter not currently supported by filter_bench");
    }
    if (FLAGS_impl > 3) {
      throw std::runtime_error(
          "-impl must currently be 0, 2, or 3 for Block-based table");
    }
  }

  if (FLAGS_vary_key_count_ratio < 0.0 || FLAGS_vary_key_count_ratio > 1.0) {
    throw std::runtime_error("-vary_key_count_ratio must be >= 0.0 and <= 1.0");
  }

  // For example, average_keys_per_filter = 100, vary_key_count_ratio = 0.1.
  // Varys up to +/- 10 keys. variance_range = 21 (generating value 0..20).
  // variance_offset = 10, so value - offset average value is always 0.
  const uint32_t variance_range =
      1 + 2 * static_cast<uint32_t>(FLAGS_vary_key_count_ratio *
                                    FLAGS_average_keys_per_filter);
  const uint32_t variance_offset = variance_range / 2;

  const std::vector<TestMode> &testModes =
      FLAGS_best_case ? bestCaseTestModes
                      : FLAGS_quick ? quickTestModes : allTestModes;

  m_queries_ = FLAGS_m_queries;
  double working_mem_size_mb = FLAGS_working_mem_size_mb;
  if (FLAGS_quick) {
    m_queries_ /= 7.0;
  } else if (FLAGS_best_case) {
    m_queries_ /= 3.0;
    working_mem_size_mb /= 10.0;
  }

  std::cout << "Building..." << std::endl;

  std::unique_ptr<BuiltinFilterBitsBuilder> builder;

  size_t total_memory_used = 0;
  size_t total_size = 0;
  size_t total_keys_added = 0;
#ifdef PREDICT_FP_RATE
  double weighted_predicted_fp_rate = 0.0;
#endif
  size_t max_total_keys;
  size_t max_mem;
  if (FLAGS_m_keys_total_max > 0) {
    max_total_keys = static_cast<size_t>(1000000 * FLAGS_m_keys_total_max);
    max_mem = SIZE_MAX;
  } else {
    max_total_keys = SIZE_MAX;
    max_mem = static_cast<size_t>(1024 * 1024 * working_mem_size_mb);
  }

  ROCKSDB_NAMESPACE::StopWatchNano timer(
      ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);

  infos_.clear();
  while ((working_mem_size_mb == 0 || total_size < max_mem) &&
         total_keys_added < max_total_keys) {
    uint32_t filter_id = random_.Next();
    uint32_t keys_to_add = FLAGS_average_keys_per_filter +
                           FastRange32(random_.Next(), variance_range) -
                           variance_offset;
    if (max_total_keys - total_keys_added < keys_to_add) {
      keys_to_add = static_cast<uint32_t>(max_total_keys - total_keys_added);
    }
    infos_.emplace_back();
    FilterInfo &info = infos_.back();
    info.filter_id_ = filter_id;
    info.keys_added_ = keys_to_add;
    if (FLAGS_use_plain_table_bloom) {
      info.plain_table_bloom_.reset(new PlainTableBloomV1());
      info.plain_table_bloom_->SetTotalBits(
          &arena_, static_cast<uint32_t>(keys_to_add * FLAGS_bits_per_key),
          FLAGS_impl, 0 /*huge_page*/, nullptr /*logger*/);
      for (uint32_t i = 0; i < keys_to_add; ++i) {
        uint32_t hash = GetSliceHash(kms_[0].Get(filter_id, i));
        info.plain_table_bloom_->AddHash(hash);
      }
      info.filter_ = info.plain_table_bloom_->GetRawData();
    } else {
      if (!builder) {
        builder.reset(
            static_cast_with_check<BuiltinFilterBitsBuilder>(GetBuilder()));
      }
      for (uint32_t i = 0; i < keys_to_add; ++i) {
        builder->AddKey(kms_[0].Get(filter_id, i));
      }
      info.filter_ =
          builder->Finish(&info.owner_, &info.filter_construction_status);
      if (info.filter_construction_status.ok()) {
        info.filter_construction_status =
            builder->MaybePostVerify(info.filter_);
      }
      if (!info.filter_construction_status.ok()) {
        PrintError(info.filter_construction_status.ToString().c_str());
      }
#ifdef PREDICT_FP_RATE
      weighted_predicted_fp_rate +=
          keys_to_add *
          builder->EstimatedFpRate(keys_to_add, info.filter_.size());
#endif
      if (FLAGS_new_builder) {
        builder.reset();
      }
      info.reader_.reset(
          table_options_.filter_policy->GetFilterBitsReader(info.filter_));
      CachableEntry<ParsedFullFilterBlock> block(
          new ParsedFullFilterBlock(table_options_.filter_policy.get(),
                                    BlockContents(info.filter_)),
          nullptr /* cache */, nullptr /* cache_handle */,
          true /* own_value */);
      info.full_block_reader_.reset(
          new FullFilterBlockReader(table_.get(), std::move(block)));
    }
    total_size += info.filter_.size();
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
    total_memory_used +=
        malloc_usable_size(const_cast<char *>(info.filter_.data()));
#endif  // ROCKSDB_MALLOC_USABLE_SIZE
    total_keys_added += keys_to_add;
  }

  uint64_t elapsed_nanos = timer.ElapsedNanos();
  double ns = double(elapsed_nanos) / total_keys_added;
  std::cout << "Build avg ns/key: " << ns << std::endl;
  std::cout << "Number of filters: " << infos_.size() << std::endl;
  std::cout << "Total size (MB): " << total_size / 1024.0 / 1024.0 << std::endl;
  if (total_memory_used > 0) {
    std::cout << "Reported total allocated memory (MB): "
              << total_memory_used / 1024.0 / 1024.0 << std::endl;
    std::cout << "Reported internal fragmentation: "
              << (total_memory_used - total_size) * 100.0 / total_size << "%"
              << std::endl;
  }

  double bpk = total_size * 8.0 / total_keys_added;
  std::cout << "Bits/key stored: " << bpk << std::endl;
#ifdef PREDICT_FP_RATE
  std::cout << "Predicted FP rate %: "
            << 100.0 * (weighted_predicted_fp_rate / total_keys_added)
            << std::endl;
#endif
  if (!FLAGS_quick && !FLAGS_best_case) {
    double tolerable_rate = std::pow(2.0, -(bpk - 1.0) / (1.4 + bpk / 50.0));
    std::cout << "Best possible FP rate %: " << 100.0 * std::pow(2.0, -bpk)
              << std::endl;
    std::cout << "Tolerable FP rate %: " << 100.0 * tolerable_rate << std::endl;

    std::cout << "----------------------------" << std::endl;
    std::cout << "Verifying..." << std::endl;

    uint32_t outside_q_per_f =
        static_cast<uint32_t>(m_queries_ * 1000000 / infos_.size());
    uint64_t fps = 0;
    for (uint32_t i = 0; i < infos_.size(); ++i) {
      FilterInfo &info = infos_[i];
      for (uint32_t j = 0; j < info.keys_added_; ++j) {
        if (FLAGS_use_plain_table_bloom) {
          uint32_t hash = GetSliceHash(kms_[0].Get(info.filter_id_, j));
          ALWAYS_ASSERT(info.plain_table_bloom_->MayContainHash(hash));
        } else {
          ALWAYS_ASSERT(
              info.reader_->MayMatch(kms_[0].Get(info.filter_id_, j)));
        }
      }
      for (uint32_t j = 0; j < outside_q_per_f; ++j) {
        if (FLAGS_use_plain_table_bloom) {
          uint32_t hash =
              GetSliceHash(kms_[0].Get(info.filter_id_, j | 0x80000000));
          fps += info.plain_table_bloom_->MayContainHash(hash);
        } else {
          fps += info.reader_->MayMatch(
              kms_[0].Get(info.filter_id_, j | 0x80000000));
        }
      }
    }
    std::cout << " No FNs :)" << std::endl;
    double prelim_rate = double(fps) / outside_q_per_f / infos_.size();
    std::cout << " Prelim FP rate %: " << (100.0 * prelim_rate) << std::endl;

    if (!FLAGS_allow_bad_fp_rate) {
      ALWAYS_ASSERT(prelim_rate < tolerable_rate);
    }
  }

  std::cout << "----------------------------" << std::endl;
  std::cout << "Mixed inside/outside queries..." << std::endl;
  // 50% each inside and outside
  uint32_t inside_threshold = UINT32_MAX / 2;
  for (TestMode tm : testModes) {
    random_.Seed(FLAGS_seed + 1);
    double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
    random_.Seed(FLAGS_seed + 1);
    double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
    std::cout << "  " << TestModeToString(tm) << " net ns/op: " << (f - d)
              << std::endl;
  }

  if (!FLAGS_quick) {
    std::cout << "----------------------------" << std::endl;
    std::cout << "Inside queries (mostly)..." << std::endl;
    // Do about 95% inside queries rather than 100% so that branch predictor
    // can't give itself an artifically crazy advantage.
    inside_threshold = UINT32_MAX / 20 * 19;
    for (TestMode tm : testModes) {
      random_.Seed(FLAGS_seed + 1);
      double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
      random_.Seed(FLAGS_seed + 1);
      double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
      std::cout << "  " << TestModeToString(tm) << " net ns/op: " << (f - d)
                << std::endl;
    }

    std::cout << "----------------------------" << std::endl;
    std::cout << "Outside queries (mostly)..." << std::endl;
    // Do about 95% outside queries rather than 100% so that branch predictor
    // can't give itself an artifically crazy advantage.
    inside_threshold = UINT32_MAX / 20;
    for (TestMode tm : testModes) {
      random_.Seed(FLAGS_seed + 2);
      double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
      random_.Seed(FLAGS_seed + 2);
      double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
      std::cout << "  " << TestModeToString(tm) << " net ns/op: " << (f - d)
                << std::endl;
    }
  }
  std::cout << fp_rate_report_.str();

  std::cout << "----------------------------" << std::endl;
  std::cout << "Done. (For more info, run with -legend or -help.)" << std::endl;
}