-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmake_cond_num_data.py
More file actions
152 lines (123 loc) · 5.27 KB
/
make_cond_num_data.py
File metadata and controls
152 lines (123 loc) · 5.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import numpy as np
import pandas as pd
import signac
import signac
import json
from bo_methods_lib.bo_methods_lib.analyze_data import General_Analysis, open_file_helper
from bo_methods_lib.bo_methods_lib.GPBO_Classes_plotters import Plotters
#Ignore warnings
import warnings
warnings.simplefilter("ignore", category=RuntimeWarning)
warnings.simplefilter("ignore", category=UserWarning)
warnings.simplefilter("ignore", category=DeprecationWarning)
#Set Stuff
meth_name_val_list = [1,2,3,4,5,6,7]
save_csv = False #Set to False if you don't want to save/resave csvs
save_figs = False
modes = ["act"]
project = signac.get_project("GPBO_nonoise")
# Initialize dictionaries to store the condition numbers and statistics
dict_k = {} # Stores raw condition numbers for each case study and BO method
dict_stats = {} # Stores the statistics (log averages, min, max, median)
# Iterate through the different case study values
for val in [11, 17, 2, 3, 15, 14, 12, 13, 10, 1]:
criteria_dict = {
"cs_name_val": val,
"ep_enum_val": 1,
"gp_package": "gpflow",
"meth_name_val": {"$in": meth_name_val_list}
}
# Iterate through each mode
for mode in modes:
analyzer = General_Analysis(criteria_dict, project, mode, save_csv)
plotters = Plotters(analyzer, save_figs)
# Get all data from experiments
df_all_jobs, job_list, theta_true_data = analyzer.get_df_all_jobs(save_csv=save_csv)
# Get best data from ep experiment
df_best, job_list_best = analyzer.get_best_data()
# Load the best GP from each method
for i in range(len(job_list_best)):
# Load job
job = job_list_best[i]
cs_name = df_best["CS Name"].iloc[i]
bo_method = df_best["BO Method"].iloc[i]
run_num = df_best["Run Number"].iloc[i]
bo_iter = df_best["BO Iter"].iloc[i]
# Open BO_Results_GPs.gz
loaded_results = open_file_helper(job.fn("BO_Results_GPs.gz"))
with open(job.fn("signac_statepoint.json"), 'r') as json_file:
# Load the JSON data
sp_data = json.load(json_file)
run_num -= sp_data["bo_run_num"]
bo_iter -= 1
# Get the GP emulator
try:
gp_emulator = loaded_results[run_num].list_gp_emulator_class[bo_iter]
except:
print(len(loaded_results), run_num, bo_iter)
print(len(loaded_results[run_num].list_gp_emulator_class))
# Compute the condition number of the kernel matrix
k = np.linalg.cond(gp_emulator.fit_gp_model.kernel(gp_emulator.feature_train_data))
# Store the condition number under both case study and BO Method
case_study_key = f"{cs_name}"
method_key = f"{bo_method}"
if (case_study_key, method_key) in dict_k:
dict_k[(case_study_key, method_key)].append(np.log10(k))
else:
dict_k[(case_study_key, method_key)] = [np.log10(k)]
# Initialize a dictionary to store the condition numbers aggregated by BO Method
from collections import defaultdict
bo_method_dict = defaultdict(list) # This will store condition numbers for each BO Method
# Aggregate condition numbers across all case studies for each BO Method
for (case_study, bo_method), values in dict_k.items():
bo_method_dict[bo_method].extend(values) # Collect all condition numbers for each BO Method
# Prepare the data for DataFrame and for dict_stats
data = {
'BO Method': [],
'Log10 Avg a': [],
'Log10 Min a': [],
'Log10 Max a': [],
'Log10 Median a': []
}
# Process the aggregated condition numbers and compute stats for each BO Method
for bo_method, values in bo_method_dict.items():
log_avg = np.mean(values) # Compute the average
log_min = np.min(values) # Compute the minimum
log_max = np.max(values) # Compute the maximum
log_median = np.median(values) # Compute the median
# Save the statistics in dict_stats (with just the BO Method key)
dict_stats[bo_method] = {
'Log10 Avg a': log_avg,
'Log10 Min a': log_min,
'Log10 Max a': log_max,
'Log10 Median a': log_median
}
# Add the aggregated data to the DataFrame
data['BO Method'].append(bo_method)
data['Log10 Avg a'].append(log_avg)
data['Log10 Min a'].append(log_min)
data['Log10 Max a'].append(log_max)
data['Log10 Median a'].append(log_median)
# Create a DataFrame from the data
df_results = pd.DataFrame(data)
# Display or save the DataFrame as needed
print(df_results)
# Optional: Save to CSV
df_results.to_csv('gpflow_condition_numbers.csv', index=False)
# Prepare the data for DataFrame conversion from dict_k
data_k = {
'Case Study': [],
'BO Method': [],
'Log10 Condition Number (a)': []
}
# Process the condition numbers and add them to the DataFrame
for (case_study, bo_method), values in dict_k.items():
for value in values:
data_k['Case Study'].append(case_study)
data_k['BO Method'].append(bo_method)
data_k['Log10 Condition Number (a)'].append(value)
# Convert the data into a DataFrame
df_k = pd.DataFrame(data_k)
print(df_k)
# Save the DataFrame as a CSV file
df_k.to_csv('gpflow_condition_numbers_raw.csv', index=False)