in tfx_bsl/cc/sketches/misragries_sketch.cc [439:491]
absl::Status MisraGriesSketch::GetCounts(
std::vector<std::pair<std::string, double>>& result) const {
result.clear();
result.reserve(item_counts_.size());
// Add delta_ to each of the counts to get the upper bound estimate.
for (const auto& pair : item_counts_) {
result.emplace_back(pair.first, pair.second + delta_);
}
switch (input_type_) {
// INT, RAW_STRING, and UNSET (empty sketch) do not need to be decoded.
case InputType::INT:
break;
case InputType::RAW_STRING:
break;
case InputType::UNSET:
break;
case InputType::FLOAT:
for (auto& item_w : result) {
TFX_BSL_RETURN_IF_ERROR(Decode(&item_w.first));
}
break;
default:
return absl::FailedPreconditionError(absl::StrCat(
"unhandled input type ", InputType::Type_Name(input_type_)));
}
std::sort(
result.begin(), result.end(),
[](const std::pair<std::string, double>& x,
const std::pair<std::string, double>& y) {
if (x.second != y.second) {
return x.second > y.second;
}
return x.first < y.first;
}
);
// Fill the `result` up to `num_buckets_` items using `extra_items_`.
if (result.size() < num_buckets_) {
std::vector<std::string> ordered_extra_items;
for (const auto& item : extra_items_) {
if (item_counts_.find(item) == item_counts_.end()) {
ordered_extra_items.emplace_back(item);
}
}
std::sort(ordered_extra_items.begin(), ordered_extra_items.end());
for (const auto& item : ordered_extra_items) {
result.emplace_back(item, delta_);
if (result.size() == num_buckets_) {
break;
}
}
}
return absl::OkStatus();
}