-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPLA_3D_visualisation.py
More file actions
134 lines (107 loc) · 4.36 KB
/
PLA_3D_visualisation.py
File metadata and controls
134 lines (107 loc) · 4.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import numpy as np
import matplotlib as mpl
#mpl.use('TkAgg') # only in macOS (slove backend problem of matplotlib 3.0.3 in macOS)
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # matplotlib sub-library, only for 3d-visulation
def data_load(dataPath):
'''load specific data set from specific path'''
dataSetCounter = len(open(dataPath,'rU').readlines()) # counting total inputs of training data set
x = []
Set = open(dataPath,'r')
for i in range(0, dataSetCounter):
data = Set.readline().replace('\n','').split(',') # split one data into a list
get_data = [float(i) for i in data] # convert list into float-type
x.append(get_data)
arr = np.asarray(x) #convert list into n-dim-array
Set.close()
return arr #return processed training data set
def PLA_train(dataSet,maxIter=20):
'''model training'''
m = dataSet.shape[0] # number of samples in dataset
n = dataSet.shape[1] # n (dim) number of features in dataset
# randomise dataset
np.random.shuffle(dataSet) # shuffle
# initialise
w = np.ones((1, n-1)) # weight
b = 0 # bias
eta = 1 # learning rate
i = 0 # itr
for j in range(maxIter):
# update - SGD
for k in range(m):
if dataSet[k][-1] * (np.sum(w * dataSet[k,0:-1],)+ b) <0:
# if: y * a <= 0
w = w + eta * dataSet[k][-1] * dataSet[k,0:-1]
# w(k+1) = w(k) + eta * y * x
b = b + eta * dataSet[k][-1]
# b(k+1) = b(k) * y
i += 1
return w, b
def PLA_test(w,b,testSet):
'''model test'''
TP,TN,FP,FN = 0,0,0,0
accuracy, precision, recall = 0,0,0
FPR, TPR, F1Score = 0,0,0
for i,element in enumerate(testSet):
a = np.sign(np.dot(element[0:-1],w[0][:])+b)
if a == element[-1]: # TP,TN
if a==1: TP+=1
else: TN+=1
else: # FP FN
if a==1: FP+=1
else: FN+=1
print('\t pred:fact {}:{}'.format(int(a),int(testSet[i,-1])))
accuracy = (TP + TN) / (TP + TN + FP + FN)
try:
precision = TP / (TP + FP)
recall = TP / (TP + FN)
FPR = FP / (FP + TN)
TPR = TP / (TP + FN)
F1Score = (2 * precision * recall) / (precision + recall)
except ZeroDivisionError as e:
print ("\nCAUTION: " + str(e) + "\n")
return accuracy,0,0,0
return accuracy, round(FPR,2),round(TPR,2),round(F1Score,2)
def Visualisation3D(dataSet,w,b,dim1=1,dim2=2,dim3=3):
'''3d visualisation part'''
x1 = dataSet[:,dim1-1] # x-axis in 3D space
x2 = dataSet[:,dim2-1] # y-axis in 3D space
X1, X2 = np.meshgrid(x1,x2)
X3 = (b - w[0][dim1-1]*X1 - w[0][dim2-1]*X2) / w[0][dim3-1] # z-axis in 3D space
# PlaneEquation: w1x1 + w2x2 + w3x3 = w0x0
# create 3d figure
fig = plt.figure(figsize=(8,6))
ax1 = fig.add_subplot(1,1,1,projection='3d')
ax1.set_title('Perceptron Result and Hyperplane')
plt.xlabel('x' + str(dim1))
plt.ylabel('x' + str(dim2))
labels = ['class1', '', 'class2']
markers = ['o','','x']
colors = ['g','','r']
for i in range(3):
idx = np.where(dataSet[:,-1]==i-1) # find the samples with same instances
ax1.scatter(dataSet[idx, dim1-1], dataSet[idx, dim2-1], dataSet[idx, dim3-1],
marker=markers[i], color=colors[i], label=labels[i], s=10)
ax1 = fig.gca(projection='3d')
ax1.plot_surface(X1, X2, X3,color='grey', alpha=0.002)
plt.legend(loc = 'upper right')
plt.tight_layout()
plt.show()
if __name__ == "__main__":
'''main part'''
# load high-dimentional dataset
trainSet = data_load("data/train.data")
testSet = data_load("data/test.data")
# model training part
w,b = PLA_train(trainSet,maxIter=20)
print('PLA_weight:',w.squeeze())
print('PLA_bias :',b)
# # model visualisation part
# Visualisation3D(trainSet,w,b,dim1=1,dim2=2,dim3=3) # alternative one for test
# !!! please drag the picture to rotate 3D model !!!
Visualisation3D(trainSet,w,b,dim1=2,dim2=3,dim3=4)
# # Equation: w1x1+w2x2+w3x3+w4x4-b=0 (attributes must be continuous)
# testing part (and return accruacy)
accuracy,FPR,TPR,F1Score = PLA_test(w,b,testSet)
print('accruacy: %d%%' % (accuracy*100)) # show percentage
print('FPR: {},TPR: {},F1Score: {}'.format(FPR,TPR,F1Score))