in src/fi_wrapper.cpp [44:113]
void bind_fi_sketch(nb::module_ &m, const char* name) {
using namespace datasketches;
auto fi_class = nb::class_<frequent_items_sketch<T, W, H, E>>(m, name)
.def(nb::init<uint8_t>(), nb::arg("lg_max_k"),
"Creates an instance of the sketch\n\n"
":param lg_max_k: base 2 logarithm of the maximum size of the internal hash map of the sketch. Maximum "
"capacity is 0.75 of this value, which is the maximum number of distinct items the sketch can contain.\n"
":type lg_max_k: int\n"
)
.def("__copy__", [](const frequent_items_sketch<T, W, H, E>& sk){ return frequent_items_sketch<T,W,H,E>(sk); })
.def("__str__", [](const frequent_items_sketch<T, W, H, E>& sk) { return sk.to_string(); },
"Produces a string summary of the sketch")
.def("to_string", &frequent_items_sketch<T, W, H, E>::to_string, nb::arg("print_items")=false,
"Produces a string summary of the sketch")
.def("update", (void (frequent_items_sketch<T, W, H, E>::*)(const T&, uint64_t)) &frequent_items_sketch<T, W, H, E>::update, nb::arg("item"), nb::arg("weight")=1,
"Updates the sketch with the given string and, optionally, a weight")
.def("merge", (void (frequent_items_sketch<T, W, H, E>::*)(const frequent_items_sketch<T, W, H, E>&)) &frequent_items_sketch<T, W, H, E>::merge,
"Merges the given sketch into this one")
.def("is_empty", &frequent_items_sketch<T, W, H, E>::is_empty,
"Returns True if the sketch is empty, otherwise False")
.def_prop_ro("num_active_items", &frequent_items_sketch<T, W, H, E>::get_num_active_items,
"The number of active items in the sketch")
.def_prop_ro("total_weight", &frequent_items_sketch<T, W, H, E>::get_total_weight,
"The sum of the weights (frequencies) in the stream seen so far by the sketch")
.def("get_estimate", &frequent_items_sketch<T, W, H, E>::get_estimate, nb::arg("item"),
"Returns the estimate of the weight (frequency) of the given item.\n"
"Note: The true frequency of a item would be the sum of the counts as a result of the "
"two update functions.")
.def("get_lower_bound", &frequent_items_sketch<T, W, H, E>::get_lower_bound, nb::arg("item"),
"Returns the guaranteed lower bound weight (frequency) of the given item.")
.def("get_upper_bound", &frequent_items_sketch<T, W, H, E>::get_upper_bound, nb::arg("item"),
"Returns the guaranteed upper bound weight (frequency) of the given item.")
.def_prop_ro("epsilon", (double (frequent_items_sketch<T, W, H, E>::*)(void) const) &frequent_items_sketch<T, W, H, E>::get_epsilon,
"The epsilon value used by the sketch to compute error")
.def(
"get_frequent_items",
[](const frequent_items_sketch<T, W, H, E>& sk, frequent_items_error_type err_type, uint64_t threshold) {
if (threshold == 0) threshold = sk.get_maximum_error();
nb::list list;
auto rows = sk.get_frequent_items(err_type, threshold);
for (auto row: rows) {
list.append(nb::make_tuple(
row.get_item(),
row.get_estimate(),
row.get_lower_bound(),
row.get_upper_bound())
);
}
return list;
},
nb::arg("err_type"), nb::arg("threshold")=0
)
.def_static(
"get_epsilon_for_lg_size",
[](uint8_t lg_max_map_size) { return frequent_items_sketch<T, W, H, E>::get_epsilon(lg_max_map_size); },
nb::arg("lg_max_map_size"),
"Returns the epsilon value used to compute a priori error for a given log2(max_map_size)"
)
.def_static(
"get_apriori_error",
&frequent_items_sketch<T, W, H, E>::get_apriori_error,
nb::arg("lg_max_map_size"), nb::arg("estimated_total_weight"),
"Returns the estimated a priori error given the max_map_size for the sketch and the estimated_total_stream_weight."
);
// serialization may need a caller-provided serde depending on the sketch type, so
// we use a separate method to handle that appropriately based on type T.
add_serialization(fi_class);
}