func MannWhitneyUTest()

in internal/stats/utest.go [125:231]


func MannWhitneyUTest(x1, x2 []float64, alt LocationHypothesis) (*MannWhitneyUTestResult, error) {
	n1, n2 := len(x1), len(x2)
	if n1 == 0 || n2 == 0 {
		return nil, ErrSampleSize
	}

	// Compute the U statistic and tie vector T.
	x1 = append([]float64(nil), x1...)
	x2 = append([]float64(nil), x2...)
	sort.Float64s(x1)
	sort.Float64s(x2)
	merged, labels := labeledMerge(x1, x2)

	R1 := 0.0
	T, hasTies := []int{}, false
	for i := 0; i < len(merged); {
		rank1, nx1, v1 := i+1, 0, merged[i]
		// Consume samples that tie this sample (including itself).
		for ; i < len(merged) && merged[i] == v1; i++ {
			if labels[i] == 1 {
				nx1++
			}
		}
		// Assign all tied samples the average rank of the
		// samples, where merged[0] has rank 1.
		if nx1 != 0 {
			rank := float64(i+rank1) / 2
			R1 += rank * float64(nx1)
		}
		T = append(T, i-rank1+1)
		if i > rank1 {
			hasTies = true
		}
	}
	U1 := R1 - float64(n1*(n1+1))/2

	// Compute the smaller of U1 and U2
	U2 := float64(n1*n2) - U1
	Usmall := math.Min(U1, U2)

	var p float64
	if !hasTies && n1 <= MannWhitneyExactLimit && n2 <= MannWhitneyExactLimit ||
		hasTies && n1 <= MannWhitneyTiesExactLimit && n2 <= MannWhitneyTiesExactLimit {
		// Use exact U distribution. U1 will be an integer.
		if len(T) == 1 {
			// All values are equal. Test is meaningless.
			return nil, ErrSamplesEqual
		}

		dist := UDist{N1: n1, N2: n2, T: T}
		switch alt {
		case LocationDiffers:
			if U1 == U2 {
				// The distribution is symmetric about
				// Usmall. Since the distribution is
				// discrete, the CDF is discontinuous
				// and if simply double CDF(Usmall),
				// we'll double count the
				// (non-infinitesimal) probability
				// mass at Usmall. What we want is
				// just the integral of the whole CDF,
				// which is 1.
				p = 1
			} else {
				p = dist.CDF(Usmall) * 2
			}

		case LocationLess:
			p = dist.CDF(U1)

		case LocationGreater:
			p = 1 - dist.CDF(U1-1)
		}
	} else {
		// Use normal approximation (with tie and continuity
		// correction).
		t := tieCorrection(T)
		N := float64(n1 + n2)
		μ_U := float64(n1*n2) / 2
		σ_U := math.Sqrt(float64(n1*n2) * ((N + 1) - t/(N*(N-1))) / 12)
		if σ_U == 0 {
			return nil, ErrSamplesEqual
		}
		numer := U1 - μ_U
		// Perform continuity correction.
		switch alt {
		case LocationDiffers:
			numer -= mathSign(numer) * 0.5
		case LocationLess:
			numer += 0.5
		case LocationGreater:
			numer -= 0.5
		}
		z := numer / σ_U
		switch alt {
		case LocationDiffers:
			p = 2 * math.Min(StdNormal.CDF(z), 1-StdNormal.CDF(z))
		case LocationLess:
			p = StdNormal.CDF(z)
		case LocationGreater:
			p = 1 - StdNormal.CDF(z)
		}
	}

	return &MannWhitneyUTestResult{N1: n1, N2: n2, U: U1,
		AltHypothesis: alt, P: p}, nil
}