in internal/stats/utest.go [125:231]
func MannWhitneyUTest(x1, x2 []float64, alt LocationHypothesis) (*MannWhitneyUTestResult, error) {
n1, n2 := len(x1), len(x2)
if n1 == 0 || n2 == 0 {
return nil, ErrSampleSize
}
// Compute the U statistic and tie vector T.
x1 = append([]float64(nil), x1...)
x2 = append([]float64(nil), x2...)
sort.Float64s(x1)
sort.Float64s(x2)
merged, labels := labeledMerge(x1, x2)
R1 := 0.0
T, hasTies := []int{}, false
for i := 0; i < len(merged); {
rank1, nx1, v1 := i+1, 0, merged[i]
// Consume samples that tie this sample (including itself).
for ; i < len(merged) && merged[i] == v1; i++ {
if labels[i] == 1 {
nx1++
}
}
// Assign all tied samples the average rank of the
// samples, where merged[0] has rank 1.
if nx1 != 0 {
rank := float64(i+rank1) / 2
R1 += rank * float64(nx1)
}
T = append(T, i-rank1+1)
if i > rank1 {
hasTies = true
}
}
U1 := R1 - float64(n1*(n1+1))/2
// Compute the smaller of U1 and U2
U2 := float64(n1*n2) - U1
Usmall := math.Min(U1, U2)
var p float64
if !hasTies && n1 <= MannWhitneyExactLimit && n2 <= MannWhitneyExactLimit ||
hasTies && n1 <= MannWhitneyTiesExactLimit && n2 <= MannWhitneyTiesExactLimit {
// Use exact U distribution. U1 will be an integer.
if len(T) == 1 {
// All values are equal. Test is meaningless.
return nil, ErrSamplesEqual
}
dist := UDist{N1: n1, N2: n2, T: T}
switch alt {
case LocationDiffers:
if U1 == U2 {
// The distribution is symmetric about
// Usmall. Since the distribution is
// discrete, the CDF is discontinuous
// and if simply double CDF(Usmall),
// we'll double count the
// (non-infinitesimal) probability
// mass at Usmall. What we want is
// just the integral of the whole CDF,
// which is 1.
p = 1
} else {
p = dist.CDF(Usmall) * 2
}
case LocationLess:
p = dist.CDF(U1)
case LocationGreater:
p = 1 - dist.CDF(U1-1)
}
} else {
// Use normal approximation (with tie and continuity
// correction).
t := tieCorrection(T)
N := float64(n1 + n2)
μ_U := float64(n1*n2) / 2
σ_U := math.Sqrt(float64(n1*n2) * ((N + 1) - t/(N*(N-1))) / 12)
if σ_U == 0 {
return nil, ErrSamplesEqual
}
numer := U1 - μ_U
// Perform continuity correction.
switch alt {
case LocationDiffers:
numer -= mathSign(numer) * 0.5
case LocationLess:
numer += 0.5
case LocationGreater:
numer -= 0.5
}
z := numer / σ_U
switch alt {
case LocationDiffers:
p = 2 * math.Min(StdNormal.CDF(z), 1-StdNormal.CDF(z))
case LocationLess:
p = StdNormal.CDF(z)
case LocationGreater:
p = 1 - StdNormal.CDF(z)
}
}
return &MannWhitneyUTestResult{N1: n1, N2: n2, U: U1,
AltHypothesis: alt, P: p}, nil
}