void bind_fi_sketch()

in src/fi_wrapper.cpp [44:113]


void bind_fi_sketch(nb::module_ &m, const char* name) {
  using namespace datasketches;

  auto fi_class = nb::class_<frequent_items_sketch<T, W, H, E>>(m, name)
    .def(nb::init<uint8_t>(), nb::arg("lg_max_k"),
         "Creates an instance of the sketch\n\n"
         ":param lg_max_k: base 2 logarithm of the maximum size of the internal hash map of the sketch. Maximum "
         "capacity is 0.75 of this value, which is the maximum number of distinct items the sketch can contain.\n"
         ":type lg_max_k: int\n"
         )
    .def("__copy__", [](const frequent_items_sketch<T, W, H, E>& sk){ return frequent_items_sketch<T,W,H,E>(sk); })
    .def("__str__", [](const frequent_items_sketch<T, W, H, E>& sk) { return sk.to_string(); },
         "Produces a string summary of the sketch")
    .def("to_string", &frequent_items_sketch<T, W, H, E>::to_string, nb::arg("print_items")=false,
         "Produces a string summary of the sketch")
    .def("update", (void (frequent_items_sketch<T, W, H, E>::*)(const T&, uint64_t)) &frequent_items_sketch<T, W, H, E>::update, nb::arg("item"), nb::arg("weight")=1,
         "Updates the sketch with the given string and, optionally, a weight")
    .def("merge", (void (frequent_items_sketch<T, W, H, E>::*)(const frequent_items_sketch<T, W, H, E>&)) &frequent_items_sketch<T, W, H, E>::merge,
         "Merges the given sketch into this one")
    .def("is_empty", &frequent_items_sketch<T, W, H, E>::is_empty,
         "Returns True if the sketch is empty, otherwise False")
    .def_prop_ro("num_active_items", &frequent_items_sketch<T, W, H, E>::get_num_active_items,
         "The number of active items in the sketch")
    .def_prop_ro("total_weight", &frequent_items_sketch<T, W, H, E>::get_total_weight,
         "The sum of the weights (frequencies) in the stream seen so far by the sketch")
    .def("get_estimate", &frequent_items_sketch<T, W, H, E>::get_estimate, nb::arg("item"),
         "Returns the estimate of the weight (frequency) of the given item.\n"
         "Note: The true frequency of a item would be the sum of the counts as a result of the "
         "two update functions.")
    .def("get_lower_bound", &frequent_items_sketch<T, W, H, E>::get_lower_bound, nb::arg("item"),
         "Returns the guaranteed lower bound weight (frequency) of the given item.")
    .def("get_upper_bound", &frequent_items_sketch<T, W, H, E>::get_upper_bound, nb::arg("item"),
         "Returns the guaranteed upper bound weight (frequency) of the given item.")
    .def_prop_ro("epsilon", (double (frequent_items_sketch<T, W, H, E>::*)(void) const) &frequent_items_sketch<T, W, H, E>::get_epsilon,
         "The epsilon value used by the sketch to compute error")
    .def(
        "get_frequent_items",
        [](const frequent_items_sketch<T, W, H, E>& sk, frequent_items_error_type err_type, uint64_t threshold) {
          if (threshold == 0) threshold = sk.get_maximum_error();
          nb::list list;
          auto rows = sk.get_frequent_items(err_type, threshold);
          for (auto row: rows) {
            list.append(nb::make_tuple(
                row.get_item(),
                row.get_estimate(),
                row.get_lower_bound(),
                row.get_upper_bound())
            );
          }
          return list;
        },
        nb::arg("err_type"), nb::arg("threshold")=0
    )
    .def_static(
        "get_epsilon_for_lg_size",
        [](uint8_t lg_max_map_size) { return frequent_items_sketch<T, W, H, E>::get_epsilon(lg_max_map_size); },
        nb::arg("lg_max_map_size"),
        "Returns the epsilon value used to compute a priori error for a given log2(max_map_size)"
    )
    .def_static(
        "get_apriori_error",
        &frequent_items_sketch<T, W, H, E>::get_apriori_error,
        nb::arg("lg_max_map_size"), nb::arg("estimated_total_weight"),
        "Returns the estimated a priori error given the max_map_size for the sketch and the estimated_total_stream_weight."
    );

    // serialization may need a caller-provided serde depending on the sketch type, so
    // we use a separate method to handle that appropriately based on type T.
    add_serialization(fi_class);
}