-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSVM_MnistPrediction_v3.R
More file actions
125 lines (99 loc) · 4.3 KB
/
SVM_MnistPrediction_v3.R
File metadata and controls
125 lines (99 loc) · 4.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#Loading Neccessary libraries
library(kernlab)
library(readr)
library(caret)
library(e1071)
#Loading Data
Data <- read.csv("mnist_train.csv")
test <- read.csv("mnist_test.csv")
## Observed discripency in colname of train and test
col_train = colnames(Data)
colnames(test) = c(col_train)
summary(Data)
sapply(Data, function(x) sum(is.na(x)))
# Split the data into train and test setnd considering only 10% of data
# for training and also for crossfold validation
set.seed(1)
indices = sample(1:nrow(Data), 0.9*nrow(Data))
validationdata = Data[-indices,]
##Performing scaling by excluding the digit column
scaled_vals = validationdata[,-1]/255
scaled_vals$letters = as.factor(validationdata$X5)
##Performing scaling by excluding the digit column
test_scaled = test[-1]/255
test_scaled$X5 = test$X5
test_scaled$letters = as.factor(test$X5)
## Further reducing the data to avoid modelling time
compresseddata = scaled_vals[1:1000,]
trainControl <- trainControl(method="cv", number=5)
# Number - Number of folds
# Method - cross validation
metric <- "Accuracy"
set.seed(100)
# making a grid of C values.
grid <- expand.grid(.C=seq(1,10, by=2))
# Performing 5-fold cross validation
fit.svm <- train(letters~., data=compresseddata, method="svmLinear", metric=metric,
tuneGrid=grid, trControl=trainControl)
print(fit.svm$results)
#C Accuracy Kappa AccuracySD KappaSD
#1 1 0.8789711 0.8653214 0.02788966 0.03106136
#2 3 0.8789711 0.8653214 0.02788966 0.03106136
#3 5 0.8789711 0.8653214 0.02788966 0.03106136
#4 7 0.8789711 0.8653214 0.02788966 0.03106136
#5 9 0.8789711 0.8653214 0.02788966 0.03106136
plot(fit.svm)
evaluate_linear_test<- predict(fit.svm, test_scaled)
confusionMatrix(evaluate_linear_test, test_scaled$letters)
### Overall Statistics ###
#Accuracy : 0.8896
#95% CI : (0.8833, 0.8957)
#No Information Rate : 0.1135
#P-Value [Acc > NIR] : < 2.2e-16
##############################
####### RBF METHOD ###########
##############################
#fit.svm_rbf <- ksvm(letters ~ ., data = compresseddata,scale = FALSE,kernel = "rbfdot",
# C=seq(1,10,by=2), epsilon=seq(0,1,by=0.25))
trainControl_rbf <- trainControl(method="cv", number=5)
metric <- "Accuracy"
## Can tune the below hyper parameters for better accuracy
grid_rbf <- expand.grid(.sigma=seq(0,1,by=0.25), .C=seq(1,10,by=2))
# Performing 5-fold cross validation
fit.svm_rbf <- train(letters~., data=compresseddata, method="svmRadial", metric=metric,
tuneGrid=grid_rbf, trControl=trainControl_rbf)
print(fit.svm_rbf$results)
#sigma C Accuracy Kappa AccuracySD KappaSD
#1 0.00 1 0.1289963 0.000000000 0.001903559 0.000000000
#2 0.00 3 0.1289963 0.000000000 0.001903559 0.000000000
#3 0.00 5 0.1289963 0.000000000 0.001903559 0.000000000
#4 0.00 7 0.1289963 0.000000000 0.001903559 0.000000000
#5 0.00 9 0.1289963 0.000000000 0.001903559 0.000000000
#6 0.25 1 0.1760118 0.055902852 0.008498797 0.010848030
#7 0.25 3 0.1830118 0.064216855 0.006108816 0.007879962
#8 0.25 5 0.1830118 0.064216855 0.006108816 0.007879962
#9 0.25 7 0.1830118 0.064216855 0.006108816 0.007879962
#10 0.25 9 0.1830118 0.064216855 0.006108816 0.007879962
#11 0.50 1 0.1310013 0.002384744 0.002282107 0.003265460
#12 0.50 3 0.1360014 0.008340587 0.004210042 0.005341405
#13 0.50 5 0.1360014 0.008340587 0.004210042 0.005341405
#14 0.50 7 0.1360014 0.008340587 0.004210042 0.005341405
#15 0.50 9 0.1360014 0.008340587 0.004210042 0.005341405
#16 0.75 1 0.1289963 0.000000000 0.001903559 0.000000000
#17 0.75 3 0.1289963 0.000000000 0.001903559 0.000000000
#18 0.75 5 0.1289963 0.000000000 0.001903559 0.000000000
#19 0.75 7 0.1289963 0.000000000 0.001903559 0.000000000
#20 0.75 9 0.1289963 0.000000000 0.001903559 0.000000000
#21 1.00 1 0.1289963 0.000000000 0.001903559 0.000000000
#22 1.00 3 0.1289963 0.000000000 0.001903559 0.000000000
#23 1.00 5 0.1289963 0.000000000 0.001903559 0.000000000
#24 1.00 7 0.1289963 0.000000000 0.001903559 0.000000000
#25 1.00 9 0.1289963 0.000000000 0.001903559 0.000000000
plot(fit.svm_rbf)
evaluate_rbf_test<- predict(fit.svm_rbf, test_scaled)
confusionMatrix(evaluate_rbf_test, test_scaled$letters)
## Overall Statistics ########
#Accuracy : 0.1769
#95% CI : (0.1695, 0.1845)
#No Information Rate : 0.1135
#P-Value [Acc > NIR] : < 2.2e-16