Handwritten_Digit_Classifier/Digit_Recognition_CNN.py at main · feeney92/Handwritten_Digit_Classifier · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import ReduceLROnPlateau
from scipy.ndimage.filters import gaussian_filter

# Load the MNIST data set
(x_train, labels_train), (x_test, labels_test) = mnist.load_data()

# Convert the data set to the correct data type
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# Scale the data set so that inputs are between 0 and 1
x_train /= 255
x_test /= 255

# Create the categorical target labels
y_train = to_categorical(labels_train, 10)
y_test = to_categorical(labels_test, 10)

# Convert the data to the right shape so it can be used as input into the CNN
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)


# Define a function which adds blur to the images (for use in the data augmentation function below)
def blur(img):
    rdm = np.random.uniform(0, 1)
    if rdm < 0.1:
        return gaussian_filter(img, sigma=1)
    elif rdm < 0.125:
        return gaussian_filter(img, sigma=1.5)
    else:
        return img


# Create augmented data by rotating, zooming in, blurring and translating the images
augmented_data = image.ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=20,  # randomly rotate images in the range
        zoom_range=0.1,  # randomly zoom image
        width_shift_range=0.2,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.2,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,
        vertical_flip=False,
        preprocessing_function=blur)  # randomly blur image

# Add the augmented data to the training data set
augmented_data.fit(x_train)
it = augmented_data.flow(x_train, y_train, shuffle=False)
batch_images, batch_labels = next(it)


# Function for visualising the input data
def visualize_data(images, categories, class_names):
    fig = plt.figure(figsize=(14, 6))
    fig.patch.set_facecolor('white')
    for i in range(3 * 7):
        plt.subplot(3, 7, i+1)
        plt.xticks([])
        plt.yticks([])
        plt.imshow(images[i])
        class_index = categories[i].argmax()
        plt.xlabel(class_names[class_index])
    plt.show()


class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
visualize_data(batch_images, batch_labels, class_names)


# Create the convolution neural network ('CNN')
net = Sequential()
# Add filter layer with regularisation term
net.add(Conv2D(filters=32, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1), kernel_regularizer=l2(0.0005)))
# Add filter layer
net.add(Conv2D(filters=32, kernel_size=(5, 5), use_bias=False))
# Apply batch normalisation
net.add(BatchNormalization())
# Add max pooling layer
net.add(MaxPool2D(pool_size=(2, 2)))
# Add dropout layer
net.add(Dropout(rate=0.25))
# Add filter layer with regularisation term
net.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.0005)))
# Add filter layer
net.add(Conv2D(64, (3, 3), activation='relu', use_bias=False))
# Apply batch normalisation
net.add(BatchNormalization())
# Add max pooling layer
net.add(MaxPool2D(pool_size=(2, 2)))
# Add dropout layer
net.add(Dropout(rate=0.25))
# Flatten output for use in dense layer
net.add(Flatten())
# Add dense layer
net.add(Dense(256, activation='relu'))
# Apply batch normalisation
net.add(BatchNormalization())
# Add dense layer
net.add(Dense(128, activation='relu'))
# Apply batch normalisation
net.add(BatchNormalization())
# Add dense layer
net.add(Dense(84, activation='relu'))
# Apply batch normalisation
net.add(BatchNormalization())
# Add dropout layer
net.add(Dropout(rate=0.25))
# Add softmax layer
net.add(Dense(10, activation='softmax'))

# Define the optimizer for the CNN
optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

# Define the conditions under which the learning rate size is reduced
learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy',
                                            patience=3,
                                            verbose=1,
                                            factor=0.5,
                                            min_lr=0.00001)

# Train the CNN (for 45 epochs)
net.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"])
history = net.fit(it,
                  validation_data=(x_test, y_test),
                  epochs=45,
                  batch_size=256,
                  verbose=2,
                  callbacks=[learning_rate_reduction])

# Save the CNN weights once training has finished
net.save("CNN_weights_digit_recognition.h5")

# View the training history
plt.figure()
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='validation loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

# Test the network
outputs = net.predict(x_test)
labels_predicted = np.argmax(outputs, axis=1)
misclassified = sum(labels_predicted != labels_test)
print('Percentage misclassified = ', 100*misclassified/labels_test.size)