in ingestion-core/src/main/java/com/mozilla/telemetry/ingestion/core/transform/PubsubMessageToObjectNode.java [754:809]
private TextNode compactHistogramEncoding(JsonNode o, String fieldName) {
final int histogramType = o.path("histogram_type").asInt(-1);
if (histogramType < 0 || histogramType > 5) {
incrementInvalidHistogramType();
return jsonHistogramEncoding(o);
}
final long sum = o.path("sum").asLong(-1);
if (sum < 0) {
incrementInvalidHistogramSum();
return jsonHistogramEncoding(o);
}
final JsonNode values = o.path("values");
if (histogramType == 2 && fieldName.startsWith("use_counter2")) {
// Histograms named as "use_counter" are reported as type 2 (boolean), but only ever have
// a non-zero value in the "1" (true) bucket (and the "sum" field should match this count).
// They can be encoded as a textual representation of that single number without any loss
// of information, and since use counters make up the majority of histogram data volume,
// this optimization case is the most important one. See
// https://firefox-source-docs.mozilla.org/toolkit/components/telemetry/collection/use-counters.html
if (sum == values.path("1").asLong(0)) {
return TextNode.valueOf(Long.toString(sum));
} else {
incrementInvalidHistogramUseCounter();
return jsonHistogramEncoding(o);
}
}
return jsonHistogramEncoding(o);
// TODO: Uncomment the following section and related test cases once we have transitioned
// analysis use cases to tolerate the additional encodings.
// spotless:off
/*
else if (histogramType == 4) {
return TextNode.valueOf(Long.toString(sum));
} else if (histogramType == 2) {
// Type 2 are "boolean" histograms where bucket "0" is a count of false values and
// bucket "1" is a count of true values.
return TextNode.valueOf(
String.format("%d,%d", values.path("0").longValue(), values.path("1").longValue()));
} else {
final int bucketCount = o.path("bucket_count").asInt(-1);
final long rangeLo = o.path("range").path(0).asLong(-1);
final long rangeHi = o.path("range").path(1).asLong(-1);
final String valString = Json.asString(values) //
.replace("{", "") //
.replace("}", "") //
.replace("\"", "");
if (bucketCount <= 0 || rangeLo < 0 || rangeHi < 0) {
incrementInvalidHistogramRange();
return jsonHistogramEncoding(o);
}
return TextNode.valueOf(String.format("%d;%d;%d;%d,%d;%s", //
bucketCount, histogramType, sum, rangeLo, rangeHi, valString));
}
*/
// spotless:on
}