in pkg/export/transform.go [275:372]
func (d *distribution) build(lset labels.Labels) (*distribution_pb.Distribution, error) {
// The exposition format in general requires buckets to be in-order but we observed
// some cases in the wild where this was not the case.
// Ensure sorting here to gracefully handle those cases sometimes. This cannot handle
// all cases. Specifically, if buckets are out-of-order distribution.complete() may
// return true before all buckets have been read. Then we will send a distribution
// with only a subset of buckets.
sort.Sort(d)
// Populate new values and bounds slices for the final proto as d will be returned to
// the memory pool while the proto will be enqueued for sending.
var (
bounds = make([]float64, 0, len(d.bounds))
values = make([]int64, 0, len(d.values))
prevBound, dev, mean float64
prevVal int64
)
// Some client libraries have race conditions causing a mismatch in counts across buckets and count
// series. The most common case seems to be count mismatching while the buckets are consistent.
// We handle this here by always picking the inf bucket value.
// This help ingesting samples that would otherwise be dropped.
d.count = float64(d.values[len(d.bounds)-1])
// In principle, the count and sum series could theoretically be NaN.
// For the sum series this has been observed in the wild.
// As NaN is not a permitted mean value in Cloud Monitoring, we leave it at the default 0 in this case.
// For the count we overrode it with the inf bucket value anyway and thus don't need special handling.
if !math.IsNaN(d.sum) && d.count > 0 {
mean = d.sum / d.count
}
for i, bound := range d.bounds {
if i > 0 && prevBound == bound {
// Bounds has to be higher than the previous one.
// Rarely, but the same bounds can occur due to string to float imprecision
// or invalid representations of the same float e.g. 1 vs 1.0.
// GCM API rejects those, so reject them early.
prometheusSamplesDiscarded.WithLabelValues("duplicate-bucket-boundary").Add(float64(d.inputSampleCount()))
err := fmt.Errorf("invalid histogram with duplicates bounds (le label value) %s: count=%f, sum=%f, dev=%f, index=%d, bucketBound=%f, bucketPrevBound=%f",
lset, d.count, d.sum, dev, i, bound, prevBound)
return nil, err
}
if math.IsInf(bound, 1) {
bound = prevBound
} else {
bounds = append(bounds, bound)
}
val := d.values[i] - prevVal
// val should never be negative and it most likely indicates a bug or a data race in a scraped
// metrics endpoint.
// It's a possible caused of the zero-count issue below so we catch it here early.
if val < 0 {
prometheusSamplesDiscarded.WithLabelValues("negative-bucket-count").Add(float64(d.inputSampleCount()))
err := fmt.Errorf("invalid bucket with negative count %s: count=%f, sum=%f, dev=%f, index=%d, bucketVal=%d, bucketPrevVal=%d",
lset, d.count, d.sum, dev, i, d.values[i], prevVal)
return nil, err
}
x := (prevBound + bound) / 2
dev += float64(val) * (x - mean) * (x - mean)
prevBound = bound
prevVal = d.values[i]
values = append(values, val)
}
// Catch distributions which are rejected by the CreateTimeSeries API and potentially
// make the entire batch fail.
if len(bounds) == 0 {
prometheusSamplesDiscarded.WithLabelValues("zero-buckets-bounds").Add(float64(d.inputSampleCount()))
return nil, nil
}
// Deviation and mean must be 0 if count is 0. We've got reports about samples with a negative
// deviation and 0 count being sent.
// Return an error to allow debugging this as it shouldn't happen under normal circumstances:
// Deviation can only become negative if one histogram bucket has a lower value than the previous
// one, which violates histogram's invariant.
if d.count == 0 && (mean != 0 || dev != 0) {
prometheusSamplesDiscarded.WithLabelValues("zero-count-violation").Add(float64(d.inputSampleCount()))
err := fmt.Errorf("invalid histogram with 0 count for %s: count=%f, sum=%f, dev=%f",
lset, d.count, d.sum, dev)
return nil, err
}
dp := &distribution_pb.Distribution{
Count: int64(d.count),
Mean: mean,
SumOfSquaredDeviation: dev,
BucketOptions: &distribution_pb.Distribution_BucketOptions{
Options: &distribution_pb.Distribution_BucketOptions_ExplicitBuckets{
ExplicitBuckets: &distribution_pb.Distribution_BucketOptions_Explicit{
Bounds: bounds,
},
},
},
BucketCounts: values,
Exemplars: buildExemplars(d.exemplars),
}
return dp, nil
}