-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathFigure_5_Phoneme_LR.py
More file actions
102 lines (89 loc) · 3.64 KB
/
Figure_5_Phoneme_LR.py
File metadata and controls
102 lines (89 loc) · 3.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
import numpy as np
from numpy import linalg
import matplotlib.pyplot as plt
from time import time
import DataReader as DR
from sklearn.linear_model import LogisticRegression
random_state=42
def find_anchors_from_class_0(X,y):
y_u=np.unique(y)
points_of_class1=X[y==y_u[1]]
distances= []
#find all distances
for a in points_of_class1:
distances.append(linalg.norm(X[y== y_u[0]] -a, axis=1, ord=1))
#get minimum distances
distances = np.array(distances)
min_distance= np.min(distances,axis=0)
min_distance = min_distance / np.std(min_distance)
inds=[]
# get the indicies of the points whose minimum distance is greater than the lower bound
inds.append(min_distance >= np.min(min_distance)+np.std(min_distance)*0.43)
inds.append(min_distance <= np.max(min_distance))
all_inds= np.all(inds,axis=0)
tempX=X[y==y_u[0]]
return tempX[all_inds]
def find_anchors_from_class_1(X,y):
y_u=np.unique(y)
points_of_class0=X[y==y_u[0]]
distances = []
#find all distances
for a in points_of_class0:
distances.append(linalg.norm(X[y== y_u[1]] - a, axis=1, ord=1))
#get minimum distances
distances = np.array(distances)
min_distance = np.min(distances, axis=0)
min_distance = min_distance / np.std(min_distance)
inds = []
# get the indicies of the points whose minimum distance is greater than the lower bound
inds.append(min_distance >= np.min(min_distance) + np.std(min_distance) * 0.7)
inds.append(min_distance <= np.max(min_distance))
all_inds = np.all(inds, axis=0)
tempX = X[y == y_u[1]]
return tempX[all_inds]
def map_min_1_2(X,anchors):
temp = []
#begin: argmin
for a in anchors[0]:
temp.append(linalg.norm(X-a,axis=1,ord=1))
temp=np.array(temp)
mins1=np.min(temp,axis=0)
mins1 = mins1 / np.std(mins1)
temp = []
for a in anchors[1]:
temp.append(linalg.norm(X - a, axis=1, ord=1))
temp = np.array(temp)
mins2 = np.min(temp, axis=0)
mins2 = mins2/ np.std(mins2)
nz = [mins1 != 0, mins2 != 0]
d = np.all(nz, axis=0)
mins1[mins1==0]=np.mean( mins1[d])
mins2[mins2 == 0] = np.mean(mins2[d])
#end: argmin
X1=np.hstack((X, mins1.reshape((len(X), 1) )))
return np.hstack((X1,mins2.reshape((len(X),1))))
X_train, X_test, y_train, y_test = DR.Phoneme()
y_unique = np.unique(y_train)
sets_of_anchors = []
sets_of_anchors.append(find_anchors_from_class_0(X_train,y_train))
sets_of_anchors.append(find_anchors_from_class_1(X_train, y_train))
XD= map_min_1_2(X_train,sets_of_anchors)
XT= map_min_1_2(X_test,sets_of_anchors)
s=time()
clf = LogisticRegression(C=1, solver='lbfgs', penalty='l2', dual=False, random_state=random_state).fit(XD,y_train)
train_time=round(time()-s,4)
score_training= accuracy_score(y_train, clf.predict(XD))
score_test=round(100*accuracy_score(y_test, clf.predict(XT)),2)
score_training= round(100*score_training,2)
plt.scatter(XD[y_train==y_unique[0]][:, -2], XD[y_train==y_unique[0]][:, -1], c='red', edgecolors='k')
plt.scatter(XD[y_train == y_unique[1]][:, -2], XD[y_train == y_unique[1]][:, -1], c='blue', edgecolors='k')
plt.xlabel(r'$||x_i-a_i^{(1)}||$')
plt.ylabel(r'$||x_i-a_i^{(2)}||$')
plt.text(XD[:,-2].max() - 0.5,XD[:,-1].max() - 2, "Training Acc. : %.2f\nTest Acc. : %.2f\nTraining T. : %.3f"% (score_training,score_test,train_time), size=10,
va="baseline", ha="right", multialignment="left",
bbox=dict(fc="none"))
plt.title('Phoneme: Logistic_Reg_LBFGS_L2')
plt.show()