-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathStatTools.py
More file actions
179 lines (147 loc) · 7.77 KB
/
StatTools.py
File metadata and controls
179 lines (147 loc) · 7.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 3 10:38:04 2019
@author: Shyam Bhuller
@Desciption: Contains all code involved in analysis to do with Sensitivity studies
"""
import numpy as np
import winsound
import matplotlib.pyplot as plt
from scipy import stats
"""Will generate a Gaussian distribution for a value and its uncertianty"""
def Generate_MC(value, error, n):
return np.random.normal(value, error , int(n))
"""Calculate A_T by randmoly sampling N(C_T > 0) and N(C_T < 0) and generating a distribution of A_T from this"""
def MC_Sim_A_T(lower, upper, n):
means = [lower, upper]
dist_l = Generate_MC(means[0], np.sqrt(lower) , n) # get n random samples in Gaussain distribution
dist_u = Generate_MC(means[1], np.sqrt(upper), n)
dist = (dist_u - dist_l) / (dist_u + dist_l) # calculate A_T distribution
A_T = np.mean(dist) # mean value of A_T
error = np.std(dist) # error of A_T
return dist, A_T, error
"""Check if a particular dataset can be modelled as a Gaussain"""
def NormaliseCheck(data, confidence):
_, p = stats.normaltest(data) # generate p value of normal distribution for the data
# if p > confidence distribution isn't gaussian
if(len(np.argwhere(p > confidence)) > 0):
print('one of the diestibutions isnt gaussian.')
else:
print('can be modelled as as gaussian for condifence:', confidence)
return p
"""Calcualte P asymmetry byt randomly sampling data from a distribution"""
def MC_A_T(C_T, n):
lower = [] # C_T < 0
upper = [] # C_T > 0
"""Get the data for the C_T conditions"""
for i in range(len(C_T)):
if C_T[i] < 0:
lower.append(C_T[i])
else:
upper.append(C_T[i])
dist, A_T, error = MC_Sim_A_T(len(lower), len(upper), n) # calculate A_T by random sampling
return dist, A_T, error
"""Estimate minimum amount of asymmetry in order to see P violation"""
def MC_Sim_BestCase(min_, max_, iter_, samples, confidence=1, n=int(1E7)):
diff = np.linspace(min_, max_, iter_) # create sets of asymmetry values to try
lower = samples/2 - diff # get C_T < 0 for the various A_Ts
upper = samples - lower # get C_T > 0
print('mean | error')
"""Goes through each A_T and checks if A_T shows significant asymmetry"""
for i in range(len(diff)):
dist, A_T, error = MC_Sim_A_T(lower[i], upper[i], n) # calculate distributions of A_T by random sampling
print(A_T, '|' , error)
"""if the uncertainty up to a confidence is less than the mean value, we have found P violation"""
if((confidence * error) < abs(A_T)):
break
"""Plot distriubution for visual example of the asymmetry needed for the given sample size"""
if len(dist) > 0:
plt.hist(dist, 50, label='$A_{Tmin}$', density=True)
print('difference needed in', samples, 'samples is at least:', diff[i])
"""will sample through generated data to get a rough order of magnitude of the sample number needed to see P violation for a given amount"""
def MC_Min_A_T(min_, max_, iter_, A_T, confidence=1, n=int(1E7)):
samples = np.linspace(min_, max_, iter_) # get a range of sample numbers to try out
lower = 0.5 * samples * (1 - A_T) # get C_T < 0
upper = 0.5 * samples * (1+ A_T) # get C_T > 0
print('mean | error')
"""Goes through each sample size and checks if A_T shows significant asymmetry"""
for i in range(len(samples)):
dist, mean, error = MC_Sim_A_T(lower[i], upper[i], n)
print(mean, '|' , error)
if((confidence * error) < abs(mean)):
break
return i, mean, samples[i], dist
"""Search algorithm for MC_Min, it will search through a range of samples until either the maximum search limit
is reached, or significant asymmetry is found. Does so by adjusting the range of values to try."""
def Search_A_T(_min, _max, _iter, A_CT, confidence=1, n=int(1E7), maxSearch=10):
print('working...')
win = False # we havn't found asymmetry yet
search = 1 # this is the first search we are trying
"""While no asymmetry has been found, keep looking"""
while win == False:
"""Dont want to run the code forever"""
if(search == maxSearch):
print('maximum search limit reached, convergence failed or increase maxSearch.')
break
score, A_CT_sim, samplesRequired, dist = MC_Min_A_T(_min, _max, _iter, A_CT, confidence, n) # calls MC_min and gets the results, see function for more detail
score += 1 # add the score
"""if all the samples we gave showed no asymmetry, we need to push the upper bound"""
if score == _iter:
win == False
print('upper bound needs to be pushed up')
_min = _max # set minimum sample to the previous maximum
_max *= 10 # det the maximum sample to 10x the previous
search += 1 # one search has been completed
"""if the score is less than the number of samples we tried, it means we found some asymmetry so we can stop"""
if score < _iter:
win = True
print('done!')
winsound.MessageBeep() # notify me when its done
weights = np.ones_like(dist)/float(len(dist)) # calculate weights such that bins of the distribution are equal to that of dist
plt.hist(dist, 50, label='$A_{Tideal}$', weights=weights) # plot the ideal value of A_T to find asymmetry
print('events needed to see asymmetry', A_CT_sim, 'with significance is', samplesRequired)
"""will sample through generated data to get a rough order of magnitude of the sample number needed to see CP violation for a given amount"""
def MC_Min_CP(min_, max_, iter_, A_T, A_Tbar, confidence=1, n=int(1E7)):
samples = np.linspace(min_, max_, iter_) # get range of samples to try
l = 0.5 * samples * (1 - A_T[0]) # get C_T < 0
u = 0.5 * samples * (1+ A_T[0]) # get C_T > 0
lbar = 0.5 * samples * (1 - A_Tbar[0]) # get -C_Tbar < 0
ubar = 0.5 * samples * (1+ A_Tbar[0]) # # get -C_Tbar > 0
print('mean | error')
"""Generates samples of P asymmretries and uses them to calculate the A_CP distribution for each sample"""
for i in range(len(samples)):
dist, _, _ = MC_Sim_A_T(l[i], u[i], n) # get A_T distribution
distbar, _, _ = MC_Sim_A_T(lbar[i], ubar[i], n) # get A_Tbar distribution
A_CP = 0.5 * (dist - distbar) # calcualte A_CP distribution
mean = np.mean(A_CP) # get the mean value
error = np.std(A_CP) # get the uncertainty
print(mean, '|' , error)
"""Check if A_CP asymmetry is significant"""
if((confidence * error) < abs(mean)):
break
return i, mean, samples[i], dist
"""Search algorithm for MC_Min_CP, almost identical to Search_A_T. See Search A_T for details"""
def Search_CP(_min, _max, _iter, A_T, A_Tbar, confidence=1, n=int(1E7), maxSearch=10):
print('working...')
win = False
search = 1
while win == False:
if(search == maxSearch):
print('maximum search limit reached, convergence failed or increase maxSearch.')
break
score, A_CP, samplesRequired, dist = MC_Min_CP(_min, _max, _iter, A_T, A_Tbar, confidence, n)
score += 1
if score == _iter:
win == False
print('upper bound needs to be pushed up')
_min = _max
_max *= 10
search += 1
if score < _iter:
win = True
print('done!')
winsound.MessageBeep()
weights = np.ones_like(dist)/float(len(dist))
plt.hist(dist, 50, weights=weights)
print('events needed to see asymmetry', A_CP, 'with significance is', samplesRequired)
return samplesRequired