diff --git a/src/FSharp.Stats/FSharp.Stats.fsproj b/src/FSharp.Stats/FSharp.Stats.fsproj
index 633a4162..323b13c1 100644
--- a/src/FSharp.Stats/FSharp.Stats.fsproj
+++ b/src/FSharp.Stats/FSharp.Stats.fsproj
@@ -128,6 +128,7 @@
+
diff --git a/src/FSharp.Stats/Testing/MannWhitney.fs b/src/FSharp.Stats/Testing/MannWhitney.fs
new file mode 100644
index 00000000..61f98d06
--- /dev/null
+++ b/src/FSharp.Stats/Testing/MannWhitney.fs
@@ -0,0 +1,101 @@
+namespace FSharp.Stats.Testing
+
+///
+/// Mann-Whitney U test (also known as the Wilcoxon rank-sum test) for two independent samples.
+/// Uses a normal approximation, which is reliable for combined sample sizes ≥ 20.
+///
+module MannWhitneyTest =
+
+ open FSharp.Stats
+
+ /// Result of a Mann-Whitney U test for two independent samples.
+ type MannWhitneyTestStatistics =
+ {
+ /// U statistic for sample 1: number of pairs (x₁, x₂) where x₁ > x₂ (counting ties as 0.5).
+ U1: float
+ /// U statistic for sample 2: U1 + U2 = n₁ × n₂.
+ U2: float
+ /// Z-score from the normal approximation.
+ Statistic: float
+ /// One-sided p-value: P(sample 1 is stochastically less than sample 2).
+ PValueLeft: float
+ /// One-sided p-value: P(sample 1 is stochastically greater than sample 2).
+ PValueRight: float
+ /// Two-sided p-value.
+ PValue: float
+ }
+
+ ///
+ /// Creates a Mann-Whitney U test for two independent samples using a normal approximation.
+ ///
+ /// First independent sample.
+ /// Second independent sample.
+ ///
+ /// When true, applies a continuity correction (±0.5) to the z-score. Recommended for small samples.
+ /// Default: true.
+ ///
+ ///
+ /// The test is reliable for combined sample sizes ≥ 20. For smaller samples, exact p-values
+ /// (not yet implemented) should be used instead of the normal approximation.
+ ///
+ /// Ties are handled via the standard variance correction.
+ ///
+ /// A large U1 (close to n₁×n₂) indicates that sample 1 tends to have larger values than sample 2,
+ /// yielding a small PValueRight. Conversely, a small U1 yields a small PValueLeft.
+ ///
+ let create (sample1: seq) (sample2: seq) (continuityCorrection: bool) : MannWhitneyTestStatistics =
+ let arr1 = Seq.toArray sample1
+ let arr2 = Seq.toArray sample2
+ let n1 = float arr1.Length
+ let n2 = float arr2.Length
+ let bigN = n1 + n2
+
+ // Rank combined array; first n1 entries correspond to sample1
+ let combined = Array.append arr1 arr2
+ let ranks = Rank.RankAverage() combined
+
+ // Sum of ranks for sample 1
+ let r1 = Array.sumBy (fun i -> ranks.[i]) [| 0 .. arr1.Length - 1 |]
+
+ // U statistics
+ let u1 = r1 - n1 * (n1 + 1.) / 2.
+ let u2 = n1 * n2 - u1
+
+ // Variance with tie correction
+ let tieCorrection =
+ ranks
+ |> Array.countBy id
+ |> Array.sumBy (fun (_, cnt) ->
+ let t = float cnt
+ t * t * t - t)
+
+ let mu = n1 * n2 / 2.
+ let var =
+ if bigN <= 1. then 0.
+ else (n1 * n2 / (bigN * (bigN - 1.))) * ((bigN * bigN * bigN - bigN - tieCorrection) / 12.)
+
+ let sigma = sqrt var
+
+ // Z-score using U1 (positive z → sample1 tends to be larger)
+ let z =
+ if sigma = 0. then 0.
+ else
+ let cc =
+ if not continuityCorrection then 0.
+ elif u1 > mu then -0.5
+ elif u1 < mu then 0.5
+ else 0.
+ (u1 - mu + cc) / sigma
+
+ let pLeft = Distributions.Continuous.Normal.CDF 0. 1. z
+ let pRight = 1. - pLeft
+ let pTwo = 2. * (min pLeft pRight)
+
+ {
+ U1 = u1
+ U2 = u2
+ Statistic = z
+ PValueLeft = pLeft
+ PValueRight = pRight
+ PValue = pTwo
+ }
diff --git a/tests/FSharp.Stats.Tests/Testing.fs b/tests/FSharp.Stats.Tests/Testing.fs
index 1adfd493..0bc2dca4 100644
--- a/tests/FSharp.Stats.Tests/Testing.fs
+++ b/tests/FSharp.Stats.Tests/Testing.fs
@@ -1455,4 +1455,62 @@ let anovaTests =
// Expect.floatClose Accuracy.low (Math.Round (twoWayANOVARandom.Total.SumOfSquares,8)) 0.7815 "Total.SumOfSquares deviated from expected value"
// Expect.floatClose Accuracy.low (Math.Round (twoWayANOVARandom.Total.MeanSquares,8)) 0.04597 "Total.MeanSquares deviated from expected value"
- ]
\ No newline at end of file
+ ]
+
+[]
+let testMannWhitneyTest =
+ // Reference values cross-checked against R: wilcox.test(..., exact=FALSE, correct=FALSE)
+ // and R: wilcox.test(..., exact=FALSE, correct=TRUE)
+
+ // Test case 1: clear separation (sample1 >> sample2) -- no ties
+ // R: wilcox.test(c(6,7,8,9,10), c(1,2,3,4,5), exact=FALSE, correct=FALSE) → W=25, p=0.004516
+ // R: wilcox.test(c(6,7,8,9,10), c(1,2,3,4,5), exact=FALSE, correct=TRUE) → W=25, p=0.01220
+ let s1a = [| 6.; 7.; 8.; 9.; 10. |]
+ let s2a = [| 1.; 2.; 3.; 4.; 5. |]
+ let resultNoCC = MannWhitneyTest.create s1a s2a false
+ let resultCC = MannWhitneyTest.create s1a s2a true
+
+ // Test case 2: moderate overlap -- no ties
+ // sample1=[2;4;6;8;10], sample2=[1;3;5;7;9]
+ // U1=15, U2=10; z(no cc)=(15-12.5)/4.787=0.5222; pRight=0.3008; pTwo=0.6016
+ let s1b = [| 2.; 4.; 6.; 8.; 10. |]
+ let s2b = [| 1.; 3.; 5.; 7.; 9. |]
+ let resultB = MannWhitneyTest.create s1b s2b false
+
+ // Test case 3: data with ties
+ // sample1=[1;2;3;4], sample2=[2;3;5;6]
+ // U1=4, U2=12, mu=8, var=11.714, sigma=3.4225
+ // z=-1.1693, pLeft=0.1212, pRight=0.8788, pTwo=0.2423
+ let s1c = [| 1.; 2.; 3.; 4. |]
+ let s2c = [| 2.; 3.; 5.; 6. |]
+ let resultC = MannWhitneyTest.create s1c s2c false
+
+ testList "Testing.MannWhitneyTest" [
+ testCase "U statistics - clear separation" <| fun () ->
+ Expect.floatClose Accuracy.medium resultNoCC.U1 25. "U1 should be 25 (sample1 dominates)"
+ Expect.floatClose Accuracy.medium resultNoCC.U2 0. "U2 should be 0"
+ testCase "U1 + U2 = n1*n2" <| fun () ->
+ Expect.floatClose Accuracy.medium (resultNoCC.U1 + resultNoCC.U2) 25. "U1+U2 must equal n1*n2=25"
+ Expect.floatClose Accuracy.medium (resultB.U1 + resultB.U2) 25. "U1+U2 must equal n1*n2=25"
+ Expect.floatClose Accuracy.medium (resultC.U1 + resultC.U2) 16. "U1+U2 must equal n1*n2=16"
+ testCase "p-values - clear separation, no continuity correction" <| fun () ->
+ // R: p-value = 0.004516
+ Expect.isTrue (resultNoCC.PValueRight < 0.01) "PValueRight should be < 0.01 (sample1 >> sample2)"
+ Expect.isTrue (resultNoCC.PValueLeft > 0.99) "PValueLeft should be > 0.99"
+ Expect.floatClose Accuracy.low resultNoCC.PValue (2. * resultNoCC.PValueRight) "Two-sided = 2 × one-sided min"
+ testCase "p-values - clear separation, continuity correction" <| fun () ->
+ // R: p-value ≈ 0.01220
+ Expect.floatClose Accuracy.low (Math.Round(resultCC.PValue, 4)) 0.0122 "PValue with CC should ≈ 0.0122"
+ testCase "U statistics - moderate overlap" <| fun () ->
+ Expect.floatClose Accuracy.medium resultB.U1 15. "U1 should be 15"
+ Expect.floatClose Accuracy.medium resultB.U2 10. "U2 should be 10"
+ testCase "p-value not significant - moderate overlap" <| fun () ->
+ Expect.isTrue (resultB.PValue > 0.05) "p-value should not be significant for nearly identical distributions"
+ testCase "U statistics with ties" <| fun () ->
+ Expect.floatClose Accuracy.medium resultC.U1 4. "U1 should be 4 (with ties)"
+ Expect.floatClose Accuracy.medium resultC.U2 12. "U2 should be 12"
+ testCase "p-values with ties" <| fun () ->
+ // z ≈ -1.169, pLeft ≈ 0.121, pTwo ≈ 0.242
+ Expect.floatClose Accuracy.low (Math.Round(resultC.PValueLeft, 3)) 0.121 "PValueLeft with ties should ≈ 0.121"
+ Expect.floatClose Accuracy.low (Math.Round(resultC.PValue, 3)) 0.243 "PValueTwoTailed with ties should ≈ 0.243"
+ ]