2020from sklearn .model_selection import train_test_split
2121from sklearn .metrics import confusion_matrix , classification_report
2222from sklearn .preprocessing import MinMaxScaler
23+
2324# module initializations
2425sns .set ()
2526HERE = os .path .abspath (os .path .dirname (__file__ ))
@@ -59,6 +60,7 @@ def prepare_data():
5960 - split dependent / independent variables
6061 - split training / test data sets
6162 """
63+ print ("Preparing data sets" )
6264 original_db = pd .read_csv (os .path .join (HERE , "data" , "reservations-db.csv" ))
6365
6466 # need to be careful to only work with a **COPY** of the original
@@ -86,71 +88,88 @@ def prepare_data():
8688 return train_test_split (x , y , test_size = 0.30 , stratify = y , random_state = 1 )
8789
8890
89- def linear_kernal ():
91+ def linear_Kernel ():
9092 """
9193 - create training and test data sets
9294 - create a Logistic Regression model
9395 - train the model
9496 - generate confusion matrix and f-score for the training set
9597 - generate confusion matrix and f-score for the test set
9698 """
99+ print ("Linear Kernel" )
97100 x_train , x_test , y_train , y_test = prepare_data ()
98101
99- scaling = MinMaxScaler (feature_range = (- 1 ,1 )).fit (x_train )
102+ print ("- scaling" )
103+ scaling = MinMaxScaler (feature_range = (- 1 , 1 )).fit (x_train )
100104 x_train_scaled = scaling .transform (x_train )
101105 x_test_scaled = scaling .transform (x_test )
102106
103- # Linear kernal or linear decision boundary
104- svm = SVC (kernel = 'linear' , probability = True )
105- model = svm .fit (X = x_train_scaled , y = y_train )
107+ # Linear Kernel or linear decision boundary
108+ print ("- training" )
109+ svm = SVC (kernel = "linear" , probability = True )
110+ model = svm .fit (X = x_train_scaled , y = y_train )
106111
112+ print ("- modeling on training data" )
107113 y_pred_train_svm = model .predict (x_train_scaled )
108114 metrics_score (y_train , y_pred_train_svm )
109115
116+ print ("- modeling on test data" )
110117 y_pred_test_svm = model .predict (x_test_scaled )
111118 metrics_score (y_test , y_pred_test_svm )
112119
113120 # Set the optimal threshold (refer to the Jupyter Notebook to see how we arrived at 42)
114- optimal_threshold_svm = 0.40
121+ optimal_threshold_svm = 0.40
115122
123+ print ("- remodeling on training data" )
116124 y_pred_train_svm = model .predict_proba (x_train_scaled )
117- metrics_score (y_train , y_pred_train_svm [:,1 ] > optimal_threshold_svm )
125+ metrics_score (y_train , y_pred_train_svm [:, 1 ] > optimal_threshold_svm )
118126
127+ print ("- remodeling on test data" )
119128 y_pred_test = model .predict_proba (x_test_scaled )
120- metrics_score (y_test , y_pred_test [:,1 ]> optimal_threshold_svm )
129+ metrics_score (y_test , y_pred_test [:, 1 ] > optimal_threshold_svm )
130+
121131
122- def rbf_kernal ():
132+ def rbf_Kernel ():
123133 """
124134 - create training and test data sets
125135 - create a Logistic Regression model
126136 - train the model
127137 - generate confusion matrix and f-score for the training set
128138 - generate confusion matrix and f-score for the test set
129139 """
140+ print ("RBF Kernel" )
130141 x_train , x_test , y_train , y_test = prepare_data ()
131142
132- scaling = MinMaxScaler (feature_range = (- 1 ,1 )).fit (x_train )
143+ print ("- scaling" )
144+ scaling = MinMaxScaler (feature_range = (- 1 , 1 )).fit (x_train )
133145 x_train_scaled = scaling .transform (x_train )
134146 x_test_scaled = scaling .transform (x_test )
135147
136- # Linear kernal or linear decision boundary
137- svm_rbf = SVC (kernel = 'rbf' ,probability = True )
138- model = svm_rbf .fit (x_train_scaled ,y_train )
148+ # Linear Kernel or linear decision boundary
149+ print ("- training" )
150+ svm_rbf = SVC (kernel = "rbf" , probability = True )
151+ model = svm_rbf .fit (x_train_scaled , y_train )
139152
153+ print ("- modeling on training data" )
140154 y_pred_train_svm = model .predict (x_train_scaled )
141155 metrics_score (y_train , y_pred_train_svm )
142156
157+ print ("- modeling on test data" )
143158 y_pred_test_svm = model .predict (x_test_scaled )
144159 metrics_score (y_test , y_pred_test_svm )
145160
146161 # Set the optimal threshold (refer to the Jupyter Notebook to see how we arrived at 42)
147- optimal_threshold_svm = 0.41
162+ optimal_threshold_svm = 0.41
148163
164+ print ("- remodeling on training data" )
149165 y_pred_train_svm = model .predict_proba (x_train_scaled )
150- metrics_score (y_train , y_pred_train_svm [:,1 ] > optimal_threshold_svm )
166+ metrics_score (y_train , y_pred_train_svm [:, 1 ] > optimal_threshold_svm )
151167
168+ print ("- remodeling on test data" )
152169 y_pred_test = model .predict_proba (x_test_scaled )
153- metrics_score (y_test , y_pred_test [:,1 ]> optimal_threshold_svm )
170+ metrics_score (y_test , y_pred_test [:, 1 ] > optimal_threshold_svm )
171+
154172
155173if __name__ == "__main__" :
156- linear_kernal ()
174+ linear_Kernel ()
175+ rbf_Kernel ()
0 commit comments