in src/tdigest_wrapper.cpp [36:105]
void bind_tdigest(nb::module_ &m, const char* name) {
using namespace datasketches;
auto tdigest_class = nb::class_<tdigest<T>>(m, name)
.def(nb::init<uint16_t>(), nb::arg("k")=tdigest<T>::DEFAULT_K,
"Creates a tdigest instance with the given value of k.\n\n"
":param k: Controls the size/accuracy trade-off of the sketch. Default is 200.\n"
":type k: int, optional"
)
.def("__copy__", [](const tdigest<T>& sk) { return tdigest<T>(sk); })
.def("update", (void(tdigest<T>::*)(T)) &tdigest<T>::update, nb::arg("item"),
"Updates the sketch with the given value")
.def("merge", (void(tdigest<T>::*)(const tdigest<T>&)) &tdigest<T>::merge, nb::arg("sketch"),
"Merges the provided sketch into this one")
.def("__str__", [](const tdigest<T>& sk) { return sk.to_string(); },
"Produces a string summary of the sketch")
.def("to_string", &tdigest<T>::to_string, nb::arg("print_centroids")=false,
"Produces a string summary of the sketch")
.def("is_empty", &tdigest<T>::is_empty,
"Returns True if the sketch is empty, otherwise False")
.def_prop_ro("k", &tdigest<T>::get_k,
"The configured parameter k")
.def("get_total_weight", &tdigest<T>::get_total_weight,
"The total weight processed by the sketch")
.def("compress", &tdigest<T>::compress,
"Process buffered values and merge centroids, if necesssary")
.def("get_min_value", &tdigest<T>::get_min_value,
"Returns the minimum value from the stream. If empty, throws a RuntimeError")
.def("get_max_value", &tdigest<T>::get_max_value,
"Returns the maximum value from the stream. If empty, throws a RuntimeError")
.def("get_rank", &tdigest<T>::get_rank, nb::arg("value"),
"Computes the approximate normalized rank of the given value")
.def("get_quantile", &tdigest<T>::get_quantile, nb::arg("rank"),
"Returns an approximation to the data value "
"associated with the given rank in a hypothetical sorted "
"version of the input stream so far.\n")
.def("get_serialized_size_bytes", &tdigest<T>::get_serialized_size_bytes,
nb::arg("with_buffer")=false,
"Returns the size of the serialized sketch, in bytes")
.def(
"get_pmf",
[](const tdigest<T>& sk, const std::vector<T>& split_points) {
return sk.get_PMF(split_points.data(), split_points.size());
},
nb::arg("split_points"),
"Returns an approximation to the Probability Mass Function (PMF) of the input stream "
"given a set of split points (values).\n"
"If the sketch is empty this returns an empty vector.\n"
"split_points is an array of m unique, monotonically increasing float values "
"that divide the real number line into m+1 consecutive disjoint intervals.\n"
"It is not necessary to include either the min or max values in these split points."
)
.def(
"get_cdf",
[](const tdigest<T>& sk, const std::vector<T>& split_points) {
return sk.get_CDF(split_points.data(), split_points.size());
},
nb::arg("split_points"),
"Returns an approximation to the Cumulative Distribution Function (CDF), which is the "
"cumulative analog of the PMF, of the input stream given a set of split points (values).\n"
"If the sketch is empty this returns an empty vector.\n"
"split_points is an array of m unique, monotonically increasing float values "
"that divide the real number line into m+1 consecutive disjoint intervals.\n"
"It is not necessary to include either the min or max values in these split points."
)
;
add_serialization<T>(tdigest_class);
add_vector_update<T>(tdigest_class);
}