func()

in pkg/export/transform.go [275:372]
63 lines of code
12 McCabe index (conditional complexity)

func (d *distribution) build(lset labels.Labels) (*distribution_pb.Distribution, error) {
	// The exposition format in general requires buckets to be in-order but we observed
	// some cases in the wild where this was not the case.
	// Ensure sorting here to gracefully handle those cases sometimes. This cannot handle
	// all cases. Specifically, if buckets are out-of-order distribution.complete() may
	// return true before all buckets have been read. Then we will send a distribution
	// with only a subset of buckets.
	sort.Sort(d)

	// Populate new values and bounds slices for the final proto as d will be returned to
	// the memory pool while the proto will be enqueued for sending.
	var (
		bounds               = make([]float64, 0, len(d.bounds))
		values               = make([]int64, 0, len(d.values))
		prevBound, dev, mean float64
		prevVal              int64
	)
	// Some client libraries have race conditions causing a mismatch in counts across buckets and count
	// series. The most common case seems to be count mismatching while the buckets are consistent.
	// We handle this here by always picking the inf bucket value.
	// This help ingesting samples that would otherwise be dropped.
	d.count = float64(d.values[len(d.bounds)-1])

	// In principle, the count and sum series could theoretically be NaN.
	// For the sum series this has been observed in the wild.
	// As NaN is not a permitted mean value in Cloud Monitoring, we leave it at the default 0 in this case.
	// For the count we overrode it with the inf bucket value anyway and thus don't need special handling.
	if !math.IsNaN(d.sum) && d.count > 0 {
		mean = d.sum / d.count
	}

	for i, bound := range d.bounds {
		if i > 0 && prevBound == bound {
			// Bounds has to be higher than the previous one.
			// Rarely, but the same bounds can occur due to string to float imprecision
			// or invalid representations of the same float e.g. 1 vs 1.0.
			// GCM API rejects those, so reject them early.
			prometheusSamplesDiscarded.WithLabelValues("duplicate-bucket-boundary").Add(float64(d.inputSampleCount()))
			err := fmt.Errorf("invalid histogram with duplicates bounds (le label value) %s: count=%f, sum=%f, dev=%f, index=%d, bucketBound=%f, bucketPrevBound=%f",
				lset, d.count, d.sum, dev, i, bound, prevBound)
			return nil, err
		}

		if math.IsInf(bound, 1) {
			bound = prevBound
		} else {
			bounds = append(bounds, bound)
		}
		val := d.values[i] - prevVal
		// val should never be negative and it most likely indicates a bug or a data race in a scraped
		// metrics endpoint.
		// It's a possible caused of the zero-count issue below so we catch it here early.
		if val < 0 {
			prometheusSamplesDiscarded.WithLabelValues("negative-bucket-count").Add(float64(d.inputSampleCount()))
			err := fmt.Errorf("invalid bucket with negative count %s: count=%f, sum=%f, dev=%f, index=%d, bucketVal=%d, bucketPrevVal=%d",
				lset, d.count, d.sum, dev, i, d.values[i], prevVal)
			return nil, err
		}
		x := (prevBound + bound) / 2
		dev += float64(val) * (x - mean) * (x - mean)

		prevBound = bound
		prevVal = d.values[i]
		values = append(values, val)
	}
	// Catch distributions which are rejected by the CreateTimeSeries API and potentially
	// make the entire batch fail.
	if len(bounds) == 0 {
		prometheusSamplesDiscarded.WithLabelValues("zero-buckets-bounds").Add(float64(d.inputSampleCount()))
		return nil, nil
	}
	// Deviation and mean must be 0 if count is 0. We've got reports about samples with a negative
	// deviation and 0 count being sent.
	// Return an error to allow debugging this as it shouldn't happen under normal circumstances:
	// Deviation can only become negative if one histogram bucket has a lower value than the previous
	// one, which violates histogram's invariant.
	if d.count == 0 && (mean != 0 || dev != 0) {
		prometheusSamplesDiscarded.WithLabelValues("zero-count-violation").Add(float64(d.inputSampleCount()))
		err := fmt.Errorf("invalid histogram with 0 count for %s: count=%f, sum=%f, dev=%f",
			lset, d.count, d.sum, dev)
		return nil, err
	}
	dp := &distribution_pb.Distribution{
		Count:                 int64(d.count),
		Mean:                  mean,
		SumOfSquaredDeviation: dev,
		BucketOptions: &distribution_pb.Distribution_BucketOptions{
			Options: &distribution_pb.Distribution_BucketOptions_ExplicitBuckets{
				ExplicitBuckets: &distribution_pb.Distribution_BucketOptions_Explicit{
					Bounds: bounds,
				},
			},
		},
		BucketCounts: values,
		Exemplars:    buildExemplars(d.exemplars),
	}
	return dp, nil
}