in python/src/fi_wrapper.cpp [41:104]
void bind_fi_sketch(py::module &m, const char* name) {
using namespace datasketches;
auto fi_class = py::class_<frequent_items_sketch<T, W, H, E>>(m, name)
.def(py::init<uint8_t>(), py::arg("lg_max_k"))
.def("__str__", &frequent_items_sketch<T, W, H, E>::to_string, py::arg("print_items")=false,
"Produces a string summary of the sketch")
.def("to_string", &frequent_items_sketch<T, W, H, E>::to_string, py::arg("print_items")=false,
"Produces a string summary of the sketch")
.def("update", (void (frequent_items_sketch<T, W, H, E>::*)(const T&, uint64_t)) &frequent_items_sketch<T, W, H, E>::update, py::arg("item"), py::arg("weight")=1,
"Updates the sketch with the given string and, optionally, a weight")
.def("merge", (void (frequent_items_sketch<T, W, H, E>::*)(const frequent_items_sketch<T, W, H, E>&)) &frequent_items_sketch<T, W, H, E>::merge,
"Merges the given sketch into this one")
.def("is_empty", &frequent_items_sketch<T, W, H, E>::is_empty,
"Returns True if the sketch is empty, otherwise False")
.def("get_num_active_items", &frequent_items_sketch<T, W, H, E>::get_num_active_items,
"Returns the number of active items in the sketch")
.def("get_total_weight", &frequent_items_sketch<T, W, H, E>::get_total_weight,
"Returns the sum of the weights (frequencies) in the stream seen so far by the sketch")
.def("get_estimate", &frequent_items_sketch<T, W, H, E>::get_estimate, py::arg("item"),
"Returns the estimate of the weight (frequency) of the given item.\n"
"Note: The true frequency of a item would be the sum of the counts as a result of the "
"two update functions.")
.def("get_lower_bound", &frequent_items_sketch<T, W, H, E>::get_lower_bound, py::arg("item"),
"Returns the guaranteed lower bound weight (frequency) of the given item.")
.def("get_upper_bound", &frequent_items_sketch<T, W, H, E>::get_upper_bound, py::arg("item"),
"Returns the guaranteed upper bound weight (frequency) of the given item.")
.def("get_sketch_epsilon", (double (frequent_items_sketch<T, W, H, E>::*)(void) const) &frequent_items_sketch<T, W, H, E>::get_epsilon,
"Returns the epsilon value used by the sketch to compute error")
.def(
"get_frequent_items",
[](const frequent_items_sketch<T, W, H, E>& sk, frequent_items_error_type err_type, uint64_t threshold) {
if (threshold == 0) threshold = sk.get_maximum_error();
py::list list;
auto rows = sk.get_frequent_items(err_type, threshold);
for (auto row: rows) {
list.append(py::make_tuple(
row.get_item(),
row.get_estimate(),
row.get_lower_bound(),
row.get_upper_bound())
);
}
return list;
},
py::arg("err_type"), py::arg("threshold")=0
)
.def_static(
"get_epsilon_for_lg_size",
[](uint8_t lg_max_map_size) { return frequent_items_sketch<T, W, H, E>::get_epsilon(lg_max_map_size); },
py::arg("lg_max_map_size"),
"Returns the epsilon value used to compute a priori error for a given log2(max_map_size)"
)
.def_static(
"get_apriori_error",
&frequent_items_sketch<T, W, H, E>::get_apriori_error,
py::arg("lg_max_map_size"), py::arg("estimated_total_weight"),
"Returns the estimated a priori error given the max_map_size for the sketch and the estimated_total_stream_weight."
);
// serialization may need a caller-provided serde depending on the sketch type, so
// we use a separate method to handle that appropriately based on type T.
add_serialization(fi_class);
}