FSharp.Stats/tests/FSharp.Stats.Tests/Testing.fs at 619442421542fc6c8e16612c7fd422716d325f78 · fslaborg/FSharp.Stats · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
module TestingTests
open Expecto
open System
open FSharp.Stats.Testing
open FSharp.Stats

open System.IO
open System.Reflection

let assembly = Assembly.GetExecutingAssembly()
let resnames = assembly.GetManifestResourceNames();
let readEmbeddedRessource (name:string) =
    match Array.tryFind (fun (r:string) -> r.Contains(name)) resnames with
    | Some path ->
        use stream = assembly.GetManifestResourceStream(path)
        use reader = new StreamReader(stream, encoding=Text.Encoding.UTF8)
        reader.ReadToEnd()

    | _ -> failwithf "could not embedded ressources, check package integrity"

let readCsv path =
    readEmbeddedRessource path
    |> fun s ->
        s.Replace("\r\n","\n").Split("\n")
    |> Array.skip 1
    |> Array.map (fun x ->
        x.Split(", ") |> fun ([|a;b|]) -> a, float b
     )

[<Tests>]
let testPostHocTests =
    //Tests taken from:
    //https://www.icalcu.com/stat/anova-tukey-hsd-calculator.html
    testList "Testing.PostHoc" [
        (*
        // Test ommitted due to extremely long runtime of CodeCov.
        testCase "tukeyHSD" <| fun () ->
            let dataA = [|3.;3.;4.;5.;2.;5.;5.;4.;4.;2.;2.;2.;4.;3.;5.;3.;4.;5.;3.;5.;                   |]
            let dataB = [|10.;7.;9.;6.;7.;7.;6.;7.;10.;7.;8.;8.;8.;6.;10.;9.;9.;6.;9.;8.;                |]
            let dataC = [|6.;5.;6.;4.;4.;6.;1.;4.;6.;5.;4.;7.;4.;2.;1.;1.;3.;4.;5.;3.;                   |]
            let dataD = [|10.;5.;6.;5.;8.;5.;6.;9.;3.;10.;5.;9.;5.;5.;6.;10.;9.;6.;9.;10.;               |]
            let dataE = [|14.;17.;14.;13.;18.;12.;17.;11.;12.;11.;12.;10.;17.;19.;18.;18.;15.;14.;18.;16.|]

            let data = [|dataA;dataB;dataC;dataD;dataE|]

            let contrastMatrix =
                [|
                    //[|-1.;1.;0.;0.;0.;|] pvalue = zero
                    [|-1.;0.;1.;0.;0.;|]
                    [|-1.;0.;0.;1.;0.;|]
                    //[|-1.;0.;0.;0.;1.;|] pvalue = zero
                    [|0.;-1.;1.;0.;0.;|]
                    [|0.;-1.;0.;1.;0.;|]
                    //[|0.;-1.;0.;0.;1.;|] pvalue = zero
                    [|0.;0.;-1.;1.;0.;|]
                    //[|0.;0.;-1.;0.;1.;|] pvalue = zero
                    //[|0.;0.;0.;-1.;1.;|] pvalue = zero
                |]

            let pValues =
                PostHoc.tukeyHSD contrastMatrix data
                |> Array.map (fun x -> x.Significance)

            //pvalues from R: TUKEY <- TukeyHSD(x=ANOVA, 'data$treatment', conf.level=0.95)
            let rpval = [0.9685630;0.0000045;0.0000003;0.7072882;0.0000618]

            Expect.floatClose Accuracy.low rpval.[0] pValues.[0] "p values should be equal."
            Expect.floatClose Accuracy.low rpval.[1] pValues.[1] "p values should be equal."
            Expect.floatClose Accuracy.low rpval.[2] pValues.[2] "p values should be equal."
            Expect.floatClose Accuracy.low rpval.[3] pValues.[3] "p values should be equal."
            Expect.floatClose Accuracy.low rpval.[4] pValues.[4] "p values should be equal."
        *)
        testCase "dunnett" <| fun () ->
            let data =
                [|
                    [|1.84;2.49;1.50;2.42;|]
                    [|2.43;1.85;2.42;2.73;|]
                    [|3.95;3.67;3.23;2.31;|]
                    [|3.21;3.20;2.32;3.30;|]
                    [|3.21;3.13;2.32;3.30;3.20;2.42;|]
                |]

            //first sample is control
            let contrastMatrix =
                [|
                    [|-1.;1.;0.;0.;0.|]
                    [|-1.;0.;1.;0.;0.|]
                    [|-1.;0.;0.;1.;0.|]
                    [|-1.;0.;0.;0.;1.|]
                |]

            let dunnettResult =
                PostHoc.dunnetts contrastMatrix data Tables.dunnettsTwoSided095

            //result from: SPSS Dunnett's test version 27
            let pval = [0.811;0.010;0.050;0.049]
            let dmean = [0.295;1.2275;0.945;0.8675]

            Expect.equal dunnettResult.[0].Significance (pval.[0]<0.05) "Significance should be equal."
            Expect.equal dunnettResult.[1].Significance (pval.[1]<0.05) "Significance should be equal."
            Expect.equal dunnettResult.[2].Significance (pval.[2]<0.05) "Significance should be equal."
            Expect.equal dunnettResult.[3].Significance (pval.[3]<0.05) "Significance should be equal."
            Expect.floatClose Accuracy.high dunnettResult.[0].L dmean.[0] "Mean differences should be equal."
            Expect.floatClose Accuracy.high dunnettResult.[1].L dmean.[1] "Mean differences should be equal."
            Expect.floatClose Accuracy.high dunnettResult.[2].L dmean.[2] "Mean differences should be equal."
            Expect.floatClose Accuracy.high dunnettResult.[3].L dmean.[3] "Mean differences should be equal."
    ]

[<Tests>]
let hTestTests =
    // H-Test with ties tested against r implementation kruskal.test(weight ~ group, data = my_data)
    let groupA = [4.17; 5.18;  5.18;  6.11;  4.50;  4.61;  5.17;  4.53;  5.33;  5.18;]
    let groupB = [4.81; 4.17;  4.41;  3.59;  5.87;  3.83;  6.03;  4.89;  4.32;  4.69;]
    let groupC = [6.31; 5.12;  5.00;  5.00;  5.00;  5.29;  5.00;  6.15;  5.80;  5.26;]
    let samples = [groupA;groupB;groupC]

    // calculation of the H test
    let hResult =
        HTest.createHTest samples

    testList "Testing.HTest" [
        testCase "createHTest" <| fun () ->
            Expect.isTrue (0.03781 = Math.Round(hResult.PValueRight,5)) "pValue should be equal."
            Expect.isTrue (6.5502  = Math.Round(hResult.Statistic,4)) "statistic should be equal."

    ]

[<Tests>]
let friedmanTestTests =
    // Friedman-Test testes against dataset from https://www.methodenberatung.uzh.ch/de/datenanalyse_spss/unterschiede/zentral/friedman.html#3.2._Ergebnisse_des_Friedman-Tests and p-values obtained from distcalc and https://www.socscistatistics.com/pvalues/chidistribution.aspx
    let A = [|275.;273.;288.;273.;244.|]
    let B = [|292.;283.;284.;285.;329.|]
    let C = [|281.;274.;298.;270.;252.|]
    let D = [|284.;275.;271.;272.;258.|]
    let E = [|285.;294.;307.;278.;275.|]
    let F = [|283.;279.;301.;276.;279.|]
    let G = [|290.;265.;298.;291.;295.|]
    let H = [|294.;277.;295.;290.;271.|]
    let I = [|300.;304.;293.;279.;271.|]
    let J = [|284.;297.;352.;292.;284.|]
    let samples = seq{A;B;C;D;E;F;G;H;I;J}

    // modified dataset from UZH for 3x equal ranks
    let A2 = [|275.;273.;288.;273.;273.|]
    let B2 = [|292.;283.;284.;285.;329.|]
    let C2 = [|281.;274.;298.;270.;252.|]
    let D2 = [|284.;275.;271.;272.;258.|]
    let E2 = [|285.;294.;307.;278.;275.|]
    let F2 = [|283.;279.;301.;276.;279.|]
    let G2 = [|290.;265.;298.;291.;295.|]
    let H2 = [|294.;277.;295.;290.;271.|]
    let I2 = [|300.;304.;293.;279.;271.|]
    let J2 = [|284.;297.;284.;292.;284.|]
    let samples2 = seq{A2;B2;C2;D2;E2;F2;G2;H2;I2;J2}


    //calculation of friedman test
    let friedmanResult1 =
        FriedmanTest.createFriedmanTest samples

    let friedmanResult2 =
        FriedmanTest.createFriedmanTest samples2

    testList "Testing.FriedmanTest" [
        testCase "createFriedmanTest2equal" <| fun () ->
            Expect.floatClose Accuracy.low friedmanResult1.Statistic 13.259 "statistics should be equal."
            Expect.floatClose Accuracy.low friedmanResult1.PValueRight 0.010077 "pValue should be equal."
        testCase "createFriedmanTest3equal" <| fun () ->
            Expect.floatClose Accuracy.low friedmanResult2.Statistic 9.738 "statistics should be equal."
            Expect.floatClose Accuracy.low friedmanResult2.PValueRight 0.04508 "pValue should be equal."
        ]

[<Tests>]
let wilcoxonTestTests =
    // tested against SciPy Version 1.7.1
    let before = seq{78.;24.;64.;45.;64.;52.;30.;50.;64.;50.;78.;22.;84.;40.;90.;72.}
    let after = seq{78.;24.;62.;48.;68.;56.;25.;44.;56.;40.;68.;36.;68.;20.;58.;32.}
    let differences = seq{0.;0.;2.;-3.;-4.;-4.;5.;6.;8.;10.;10.;-14.;16.;20.;32.;40.}
    // with continuity correction:
    let wilcoxon1 = WilcoxonTest.createWilcoxonTest before after true
    let wilcoxon2 = WilcoxonTest.createWilcoxonTest before after false
    let wilcoxon3 = WilcoxonTest.createWilcoxonTestFromDifferences differences true
    let wilcoxon4 = WilcoxonTest.createWilcoxonTestFromDifferences differences false

    testList "Testing.WilcoxonTest" [
        testCase "wilcoxonWithCorrection" <| fun () ->
            Expect.floatClose Accuracy.low wilcoxon1.PValueTwoTailed 0.0382 "pValue should be equal."
        testCase "wilcoxonWithoutCorrection" <| fun () ->
            Expect.floatClose Accuracy.low wilcoxon2.PValueTwoTailed 0.03537 "pValue should be equal."
        testCase "wilcoxonDifferencesWithCorrection" <| fun () ->
            Expect.floatClose Accuracy.low wilcoxon3.PValueTwoTailed 0.0382 "pValue should be equal."
        testCase "wilcoxonDifferencesWithoutCorrection" <| fun () ->
            Expect.floatClose Accuracy.low wilcoxon4.PValueTwoTailed 0.03537 "pValue should be equal."
        testCase "wilcoxonOneSidedWithCorrection" <| fun () ->
            Expect.floatClose Accuracy.low wilcoxon1.PValueLeft 0.019102 "pValue should be equal"
        testCase "wilcoxonOneSidedWithoutCorrection" <| fun () ->
            Expect.floatClose Accuracy.low wilcoxon2.PValueRight 0.9823 "pValue should be equal"

        ]


[<Tests>]
let tTestTests =
    // tested in SPSS version 27
    let groupA = vector [-5.;-3.;-3.;-4.;-5.;]
    let groupB = vector [-2.;-4.;-4.;-6.;-6.;-6.;-5.;]
    let groupC = vector [-3.;-7.;-8.;-4.;-2.; 1.;-1.;]
    let groupD = vector [1.;-1.;0.;2.;2.;]

    let meanA = Seq.mean groupA
    let meanB = Seq.mean groupB
    let varA = Seq.var groupA
    let varB = Seq.var groupB
    let nA = float (Seq.length groupA)
    let nB = float (Seq.length groupB)

    // calculation of the H test
    let tTest1 = TTest.twoSample true groupA groupB
    let tTest2 = TTest.twoSampleFromMeanAndVar true (meanA,varA,nA) (meanB,varB,nB)
    let tTest3 = TTest.twoSample false groupA groupB
    let tTest4 = TTest.oneSample groupD 0.5

    testList "Testing.TTest" [
        testCase "twoSample" <| fun () ->
            Expect.floatClose Accuracy.low tTest1.PValue 0.377 "pValue should be equal."
            Expect.floatClose Accuracy.low tTest1.DegreesOfFreedom 10. "df should be equal."
            Expect.floatClose Accuracy.low tTest1.Statistic 0.924 "t statistic should be equal."
            Expect.floatClose Accuracy.low tTest3.PValue 0.345 "pValue should be equal."
            Expect.floatClose Accuracy.low tTest3.DegreesOfFreedom 9.990 "df should be equal."
        testCase "twoSampleFromMeanAndVar" <| fun () ->
            Expect.equal tTest1 tTest2 "results should be equal."
        testCase "oneSample" <| fun () ->
            Expect.floatClose Accuracy.low tTest4.PValue 0.634 "pValue should be equal."
            Expect.equal tTest4.DegreesOfFreedom 4. "df should be equal."
            Expect.floatClose Accuracy.low tTest4.Statistic 0.514 "t statistic should be equal."
    ]


[<Tests>]
let uTestTests =
    // taken from https://de.wikipedia.org/wiki/Wilcoxon-Mann-Whitney-Test#Beispiel
    let testList1 =
        ([0;400;500;550;600;650;750;800;900;950;1000;1100;1200;1500;1600;1800;1900;2000;2200;3500 ],["M";"W";"M";"W";"M";"W";"M";"M";"W";"W";"M";"M";"W";"M";"W";"M";"M";"M";"M";"M"])
        ||> List.map2 (fun pay sex -> sex, pay) |> List.sortBy fst

    let testList1A = testList1 |> List.choose (fun (sex,pay) -> if sex = "W" then Some pay else None)
    let testList1B = testList1 |> List.choose (fun (sex,pay) -> if sex = "M" then Some pay else None)

    let observedResult1 = UTest.computeUtest testList1A testList1B
    let expectedResult1 : TestStatistics.UTestTestStatistics = {
        Statistic       = -1.15
        PValueTwoTailed = 0.2505
        PValueLeft      = 0.875
        PValueRight     = 0.1253
    }

    testList "Testing.UTest" [
        testCase "TwoSample" <| fun () ->
            Expect.floatClose Accuracy.low observedResult1.PValueLeft expectedResult1.PValueLeft "left p-value should be equal"
            Expect.floatClose Accuracy.low observedResult1.PValueRight expectedResult1.PValueRight "right p-value should be equal"
            Expect.floatClose Accuracy.low observedResult1.PValueTwoTailed expectedResult1.PValueTwoTailed "p-value should be equal"
            Expect.floatClose Accuracy.low observedResult1.Statistic expectedResult1.Statistic "test statistic should be equal"
    ]


[<Tests>]
let chiSquaredTests =
    // ChiSquared https://www.graphpad.com/quickcalcs/chisquared2/
    // example from R
    // obs <- c(315, 101, 108, 32)
    // exp <- c(0.5625, 0.1875, 0.1875, 0.0625)
    // chisq.test(obs, p = exp)
    let testCase1 =
        let expected = [312.75;104.25;104.25;34.75]
        let observed = [315.;101.;108.;32.]
        let df = expected.Length - 1
        ChiSquareTest.compute df expected observed

    //obs <- c(315, 101, 80, 32, 50)
    //exp <- c(0.5625, 0.1875, 0.0875, 0.0625,0.1)
    //chisq.test(obs, p = exp)
    let testCase2 =
        let expected = [325.125;108.375;50.575;36.125;57.8]
        let observed = [315.;101.;80.;32.;50.]
        let df = expected.Length - 1
        ChiSquareTest.compute df expected observed

    testList "Testing.ChiSquaredTest" [
        testCase "compute" <| fun () ->
            Expect.isTrue (0.9254 = Math.Round(testCase1.PValueRight,4)) "pValue should be equal."
            Expect.isTrue (0.4700 = Math.Round(testCase1.Statistic,4)) "statistic should be equal."
            Expect.isTrue (0.000638 = Math.Round(testCase2.PValueRight,6)) "pValue should be equal."
            Expect.isTrue (19.461 = Math.Round(testCase2.Statistic,3)) "statistic should be equal."

    ]

[<Tests>]
let pearsonTests =
    // examples from R
    // cor.test(x,y)
    let testCase1 =
        let seq1 = [44.4; 45.9; 41.9; 53.3; 44.7; 44.1; 50.7; 45.2; 60.1;]
        let seq2 = [ 2.6;  3.1;  2.5;  5.0;  3.6;  4.0;  5.2;  2.8;  3.8;]
        Correlation.testPearson seq1 seq2

    let testCase2 =
        let seq1 = [312.7; 104.2; 104.; 34.7]
        let seq2 = [315.5; 101.3; 108.; 32.2]
        Correlation.testPearson seq1 seq2

    testList "Testing.Correlation" [
        testCase "testPearson" <| fun () ->
            Expect.isTrue (0.108173054 = Math.Round(testCase1.PValue,9)) "pValue should be equal"
            Expect.isTrue (0.000294627 = Math.Round(testCase2.PValue,9)) "pValue should be equal"
    ]


[<Tests>]
let benjaminiHochbergTests =

    let largeSetWithIds = readCsv @"benjaminiHochberg_Input.csv"
    let largeSet        = largeSetWithIds |> Array.map snd

    let largeSetWithIds_Expected = readCsv @"benjaminiHochberg_AdjustedWithR.csv"
    let largeSet_Expected        = largeSetWithIds_Expected |> Array.map snd

    testList "Testing.MultipleTesting.BenjaminiHochberg" [

        testCase "testBHLarge" (fun () ->
            Expect.sequenceEqual
                (largeSet |> MultipleTesting.benjaminiHochbergFDR |> Seq.map (fun x -> Math.Round(x,9)))
                (largeSet_Expected |> Seq.map (fun x -> Math.Round(x,9)))
                "adjusted pValues should be equal to the reference implementation."
        )

        testCase "testBHLargeNaN" (fun () ->
            Expect.sequenceEqual
                ([nan; nan; yield! largeSet] |> MultipleTesting.benjaminiHochbergFDR |> Seq.skip 2 |> Seq.map (fun x -> Math.Round(x,9)))
                (largeSet_Expected |> Seq.map (fun x -> Math.Round(x,9)))
                "adjusted pValues should be equal to the reference implementation, ignoring nan."
        )

        testCase "testBHLargeBy" (fun () ->
            Expect.sequenceEqual
                (
                    largeSetWithIds
                    |> MultipleTesting.benjaminiHochbergFDRBy id
                    |> Seq.sortBy fst
                    |> Seq.map (fun (x,y) -> x, Math.Round(y,9))
                )
                (
                    largeSetWithIds_Expected
                    |> Seq.sortBy fst
                    |> Seq.map (fun (x,y) -> x, Math.Round(y,9))
                )
                "adjusted pValues with keys should be equal to the reference implementation."
        )

        testCase "testBHLargeNaNBy" (fun () ->
            Expect.sequenceEqual
                (
                    [("A0",nan); ("A0",nan); yield! largeSetWithIds]
                    |> MultipleTesting.benjaminiHochbergFDRBy id
                    |> Seq.sortBy fst
                    |> Seq.skip 2
                    |> Seq.map (fun (x,y) -> x, Math.Round(y,9))
                )
                (
                    largeSetWithIds_Expected
                    |> Seq.sortBy fst
                    |> Seq.map (fun (x,y) -> x, Math.Round(y,9))
                )
                "adjusted pValues with keys should be equal to the reference implementation, ignoring nan."
        )

    ]


[<Tests>]
let qValuesTest =

    let largeSetWithIds = readCsv @"benjaminiHochberg_Input.csv"
    let largeSet        = largeSetWithIds |> Array.map snd

    let largeSetWithIds_Expected = readCsv @"qvaluesWithR.csv"
    let largeSet_Expected        = largeSetWithIds_Expected |> Array.map snd

    let largeSetWithIds_ExpectedRobust = readCsv @"qvaluesRobustWithR.csv"
    let largeSet_ExpectedRobust        = largeSetWithIds_ExpectedRobust |> Array.map snd

    testList "Testing.MultipleTesting.Qvalues" [

        testCase "ofPValues" (fun () ->
            //tested against r qvalue package 2.26.0
            //pi0 estimation is in closed form in r package and therefore cannot be tested
            //qvalue::qvalue(pvals,pi0=0.48345)
            let pi0 = 0.48345
            Expect.sequenceEqual
                (largeSet |> MultipleTesting.Qvalues.ofPValues pi0 |> Seq.map (fun x -> Math.Round(x,9)))
                (largeSet_Expected |> Seq.map (fun x -> Math.Round(x,9)))
                "qValues should be equal to the reference implementation."
        )

        testCase "ofPValuesRobust" (fun () ->
            //tested against r qvalue package 2.26.0
            //pi0 estimation is in closed form in r package and therefore cannot be tested
            //qvalue::qvalue(pvals,pi0=0.48345,pfdr=TRUE)
            let pi0 = 0.48345
            Expect.sequenceEqual
                (largeSet |> MultipleTesting.Qvalues.ofPValuesRobust pi0 |> Seq.map (fun x -> Math.Round(x,9)))
                (largeSet_ExpectedRobust |> Seq.map (fun x -> Math.Round(x,9)))
                "qValues Robust should be equal to the reference implementation."
        )

    ]