-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHW4-ch9q7.py
More file actions
74 lines (59 loc) · 2.99 KB
/
HW4-ch9q7.py
File metadata and controls
74 lines (59 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
np.random.seed(42)
plt.style.use('seaborn-v0_8')
population_mean = 50
population_std = 10
population_size = 10000
population = np.random.normal(population_mean, population_std, population_size)
sample_sizes = np.logspace(np.log10(10), np.log10(population_size * 0.1), 25).astype(int)
analytical_sem = population_std / np.sqrt(sample_sizes)
num_repeats = 50
sem_estimates_method1 = np.zeros((len(sample_sizes), num_repeats))
sample_means_storage = np.zeros((len(sample_sizes), num_repeats))
for i, n in enumerate(sample_sizes):
for j in range(num_repeats):
sample = np.random.choice(population, size=n, replace=False)
sem_estimates_method1[i, j] = np.std(sample, ddof=1) / np.sqrt(n)
sample_means_storage[i, j] = np.mean(sample)
sem_estimates_method2 = np.std(sample_means_storage, axis=1, ddof=1)
mean_sem_method1 = np.mean(sem_estimates_method1, axis=1)
std_sem_method1 = np.std(sem_estimates_method1, axis=1)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10), sharex=True)
ax1.plot(sample_sizes, analytical_sem, 'k-', label='Analytical SEM', linewidth=2)
ax1.plot(sample_sizes, mean_sem_method1, 'bo--', label='Method 1: Sample SD/√N', markersize=5)
ax1.plot(sample_sizes, sem_estimates_method2, 'rs--', label='Method 2: SD of sample means', markersize=5)
ax1.set_ylabel('Standard Error of the Mean')
ax1.set_title('A) Comparison of SEM Estimation Methods')
ax1.legend()
ax1.grid(True)
percent_error_method1 = 100 * (mean_sem_method1 - analytical_sem) / analytical_sem
percent_error_method2 = 100 * (sem_estimates_method2 - analytical_sem) / analytical_sem
ax2.plot(sample_sizes, percent_error_method1, 'bo--', label='Method 1 Error', markersize=5)
ax2.plot(sample_sizes, percent_error_method2, 'rs--', label='Method 2 Error', markersize=5)
ax2.axhline(0, color='k', linestyle='--', alpha=0.5)
ax2.set_xscale('log')
ax2.set_xlabel('Sample Size (log scale)')
ax2.set_ylabel('Percentage Error (%)')
ax2.set_title('B) Percentage Error Relative to Analytical SEM')
ax2.legend()
ax2.grid(True)
plt.tight_layout()
plt.show()
print("\nSummary of Results:")
print("1. As expected, the Standard Error of the Mean (SEM) decreases with increasing sample size.")
print("2. For small sample sizes (under ~100), there is more variation between estimation methods.")
print("3. Both empirical methods converge to the analytical SEM as sample size increases.")
print("4. The second method (SD of sample means) provides a more direct estimation and is less variable.")
plt.figure(figsize=(10, 6))
for i, n in enumerate(sample_sizes[::5]):
plt.scatter([n] * num_repeats, sample_means_storage[i], alpha=0.5, label=f'N={n}')
plt.xscale('log')
plt.xlabel('Sample Size (log scale)')
plt.ylabel('Sample Means')
plt.title('Variability of Sample Means Across Different Sample Sizes')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.show()