diff --git a/src/FSharp.Stats/FSharp.Stats.fsproj b/src/FSharp.Stats/FSharp.Stats.fsproj index 633a4162..323b13c1 100644 --- a/src/FSharp.Stats/FSharp.Stats.fsproj +++ b/src/FSharp.Stats/FSharp.Stats.fsproj @@ -128,6 +128,7 @@ + diff --git a/src/FSharp.Stats/Testing/MannWhitney.fs b/src/FSharp.Stats/Testing/MannWhitney.fs new file mode 100644 index 00000000..61f98d06 --- /dev/null +++ b/src/FSharp.Stats/Testing/MannWhitney.fs @@ -0,0 +1,101 @@ +namespace FSharp.Stats.Testing + +/// +/// Mann-Whitney U test (also known as the Wilcoxon rank-sum test) for two independent samples. +/// Uses a normal approximation, which is reliable for combined sample sizes ≥ 20. +/// +module MannWhitneyTest = + + open FSharp.Stats + + /// Result of a Mann-Whitney U test for two independent samples. + type MannWhitneyTestStatistics = + { + /// U statistic for sample 1: number of pairs (x₁, x₂) where x₁ > x₂ (counting ties as 0.5). + U1: float + /// U statistic for sample 2: U1 + U2 = n₁ × n₂. + U2: float + /// Z-score from the normal approximation. + Statistic: float + /// One-sided p-value: P(sample 1 is stochastically less than sample 2). + PValueLeft: float + /// One-sided p-value: P(sample 1 is stochastically greater than sample 2). + PValueRight: float + /// Two-sided p-value. + PValue: float + } + + /// + /// Creates a Mann-Whitney U test for two independent samples using a normal approximation. + /// + /// First independent sample. + /// Second independent sample. + /// + /// When true, applies a continuity correction (±0.5) to the z-score. Recommended for small samples. + /// Default: true. + /// + /// + /// The test is reliable for combined sample sizes ≥ 20. For smaller samples, exact p-values + /// (not yet implemented) should be used instead of the normal approximation. + /// + /// Ties are handled via the standard variance correction. + /// + /// A large U1 (close to n₁×n₂) indicates that sample 1 tends to have larger values than sample 2, + /// yielding a small PValueRight. Conversely, a small U1 yields a small PValueLeft. + /// + let create (sample1: seq) (sample2: seq) (continuityCorrection: bool) : MannWhitneyTestStatistics = + let arr1 = Seq.toArray sample1 + let arr2 = Seq.toArray sample2 + let n1 = float arr1.Length + let n2 = float arr2.Length + let bigN = n1 + n2 + + // Rank combined array; first n1 entries correspond to sample1 + let combined = Array.append arr1 arr2 + let ranks = Rank.RankAverage() combined + + // Sum of ranks for sample 1 + let r1 = Array.sumBy (fun i -> ranks.[i]) [| 0 .. arr1.Length - 1 |] + + // U statistics + let u1 = r1 - n1 * (n1 + 1.) / 2. + let u2 = n1 * n2 - u1 + + // Variance with tie correction + let tieCorrection = + ranks + |> Array.countBy id + |> Array.sumBy (fun (_, cnt) -> + let t = float cnt + t * t * t - t) + + let mu = n1 * n2 / 2. + let var = + if bigN <= 1. then 0. + else (n1 * n2 / (bigN * (bigN - 1.))) * ((bigN * bigN * bigN - bigN - tieCorrection) / 12.) + + let sigma = sqrt var + + // Z-score using U1 (positive z → sample1 tends to be larger) + let z = + if sigma = 0. then 0. + else + let cc = + if not continuityCorrection then 0. + elif u1 > mu then -0.5 + elif u1 < mu then 0.5 + else 0. + (u1 - mu + cc) / sigma + + let pLeft = Distributions.Continuous.Normal.CDF 0. 1. z + let pRight = 1. - pLeft + let pTwo = 2. * (min pLeft pRight) + + { + U1 = u1 + U2 = u2 + Statistic = z + PValueLeft = pLeft + PValueRight = pRight + PValue = pTwo + } diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs index 1adfd493..0bc2dca4 100644 --- a/tests/FSharp.Stats.Tests/Testing.fs +++ b/tests/FSharp.Stats.Tests/Testing.fs @@ -1455,4 +1455,62 @@ let anovaTests = // Expect.floatClose Accuracy.low (Math.Round (twoWayANOVARandom.Total.SumOfSquares,8)) 0.7815 "Total.SumOfSquares deviated from expected value" // Expect.floatClose Accuracy.low (Math.Round (twoWayANOVARandom.Total.MeanSquares,8)) 0.04597 "Total.MeanSquares deviated from expected value" - ] \ No newline at end of file + ] + +[] +let testMannWhitneyTest = + // Reference values cross-checked against R: wilcox.test(..., exact=FALSE, correct=FALSE) + // and R: wilcox.test(..., exact=FALSE, correct=TRUE) + + // Test case 1: clear separation (sample1 >> sample2) -- no ties + // R: wilcox.test(c(6,7,8,9,10), c(1,2,3,4,5), exact=FALSE, correct=FALSE) → W=25, p=0.004516 + // R: wilcox.test(c(6,7,8,9,10), c(1,2,3,4,5), exact=FALSE, correct=TRUE) → W=25, p=0.01220 + let s1a = [| 6.; 7.; 8.; 9.; 10. |] + let s2a = [| 1.; 2.; 3.; 4.; 5. |] + let resultNoCC = MannWhitneyTest.create s1a s2a false + let resultCC = MannWhitneyTest.create s1a s2a true + + // Test case 2: moderate overlap -- no ties + // sample1=[2;4;6;8;10], sample2=[1;3;5;7;9] + // U1=15, U2=10; z(no cc)=(15-12.5)/4.787=0.5222; pRight=0.3008; pTwo=0.6016 + let s1b = [| 2.; 4.; 6.; 8.; 10. |] + let s2b = [| 1.; 3.; 5.; 7.; 9. |] + let resultB = MannWhitneyTest.create s1b s2b false + + // Test case 3: data with ties + // sample1=[1;2;3;4], sample2=[2;3;5;6] + // U1=4, U2=12, mu=8, var=11.714, sigma=3.4225 + // z=-1.1693, pLeft=0.1212, pRight=0.8788, pTwo=0.2423 + let s1c = [| 1.; 2.; 3.; 4. |] + let s2c = [| 2.; 3.; 5.; 6. |] + let resultC = MannWhitneyTest.create s1c s2c false + + testList "Testing.MannWhitneyTest" [ + testCase "U statistics - clear separation" <| fun () -> + Expect.floatClose Accuracy.medium resultNoCC.U1 25. "U1 should be 25 (sample1 dominates)" + Expect.floatClose Accuracy.medium resultNoCC.U2 0. "U2 should be 0" + testCase "U1 + U2 = n1*n2" <| fun () -> + Expect.floatClose Accuracy.medium (resultNoCC.U1 + resultNoCC.U2) 25. "U1+U2 must equal n1*n2=25" + Expect.floatClose Accuracy.medium (resultB.U1 + resultB.U2) 25. "U1+U2 must equal n1*n2=25" + Expect.floatClose Accuracy.medium (resultC.U1 + resultC.U2) 16. "U1+U2 must equal n1*n2=16" + testCase "p-values - clear separation, no continuity correction" <| fun () -> + // R: p-value = 0.004516 + Expect.isTrue (resultNoCC.PValueRight < 0.01) "PValueRight should be < 0.01 (sample1 >> sample2)" + Expect.isTrue (resultNoCC.PValueLeft > 0.99) "PValueLeft should be > 0.99" + Expect.floatClose Accuracy.low resultNoCC.PValue (2. * resultNoCC.PValueRight) "Two-sided = 2 × one-sided min" + testCase "p-values - clear separation, continuity correction" <| fun () -> + // R: p-value ≈ 0.01220 + Expect.floatClose Accuracy.low (Math.Round(resultCC.PValue, 4)) 0.0122 "PValue with CC should ≈ 0.0122" + testCase "U statistics - moderate overlap" <| fun () -> + Expect.floatClose Accuracy.medium resultB.U1 15. "U1 should be 15" + Expect.floatClose Accuracy.medium resultB.U2 10. "U2 should be 10" + testCase "p-value not significant - moderate overlap" <| fun () -> + Expect.isTrue (resultB.PValue > 0.05) "p-value should not be significant for nearly identical distributions" + testCase "U statistics with ties" <| fun () -> + Expect.floatClose Accuracy.medium resultC.U1 4. "U1 should be 4 (with ties)" + Expect.floatClose Accuracy.medium resultC.U2 12. "U2 should be 12" + testCase "p-values with ties" <| fun () -> + // z ≈ -1.169, pLeft ≈ 0.121, pTwo ≈ 0.242 + Expect.floatClose Accuracy.low (Math.Round(resultC.PValueLeft, 3)) 0.121 "PValueLeft with ties should ≈ 0.121" + Expect.floatClose Accuracy.low (Math.Round(resultC.PValue, 3)) 0.243 "PValueTwoTailed with ties should ≈ 0.243" + ]