Cainvas

Handwritten Optical Character Recognition Calculator


The dataset is taken from this link

Importing Necessary Libraries

In [1]:
import numpy as np
import cv2
import os
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Activation, MaxPool2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import seaborn as sn
import matplotlib.pyplot as plt
import pandas as pd
import imutils
from imutils.contours import sort_contours
In [2]:
!wget https://cainvas-static.s3.amazonaws.com/media/user_data/Yuvnish17/data.zip
!unzip -qo data.zip
--2021-07-12 11:32:37--  https://cainvas-static.s3.amazonaws.com/media/user_data/Yuvnish17/data.zip
Resolving cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)... 52.219.160.19
Connecting to cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)|52.219.160.19|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 28437489 (27M) [application/x-zip-compressed]
Saving to: ‘data.zip’

data.zip            100%[===================>]  27.12M  99.0MB/s    in 0.3s    

2021-07-12 11:32:37 (99.0 MB/s) - ‘data.zip’ saved [28437489/28437489]

Loading the Dataset

In [3]:
x = []
y = []
datadir = 'data/dataset'
for folder in os.listdir(datadir):
    path = os.path.join(datadir, folder)
    for images in os.listdir(path):
        img = cv2.imread(os.path.join(path, images))
        x.append(img)
        y.append(folder)
        
print(len(x))
print(len(y))
print(f'labels : {list(set(y))}')
7600
7600
labels : ['mul', 'div', '0', '4', '1', '2', 'sub', 'add', '8', '5', '9', '7', '3', '6']

Visualizing Images in the Dataset

In [4]:
figure = plt.figure(figsize=(10, 10))
j = 0
for i in list(set(y)):
    idx = y.index(i)
    img = x[idx]
    img = cv2.resize(img, (256, 256))
    figure.add_subplot(5, 5, j+1)
    plt.imshow(img)
    plt.axis('off')
    plt.title(i)
    j += 1

Data Distribution of the Dataset

In [5]:
unique, count = np.unique(y, return_counts=True)
figure = plt.figure(figsize=(20, 10))
sn.barplot(unique, count).set_title('Number of Images per Category')
plt.show()
/opt/tljh/user/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning

As can been seen, the dataset is not much imbalanced. So balancing is not required here much.

Preprocessing the Data

In [6]:
X = []
for i in range(len(x)):
#     print(i)
    img = x[i]
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    threshold_image = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1]
    threshold_image = cv2.resize(threshold_image, (32, 32))
    X.append(threshold_image)
print(len(X))
7600
In [7]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
print(len(y))
7600
In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)

Data Distribution in Train and Test Set

In [9]:
unique_train, count_train = np.unique(Y_train, return_counts=True)
figure = plt.figure(figsize=(20, 10))
sn.barplot(unique_train, count_train).set_title('Number of Images per category in Train Set')
plt.show()
/opt/tljh/user/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning
In [10]:
unique_test, count_test = np.unique(Y_test, return_counts=True)
figure = plt.figure(figsize=(20, 10))
sn.barplot(unique_test, count_test).set_title('Number of Images per category in Test Set')
plt.show()
/opt/tljh/user/lib/python3.7/site-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
  FutureWarning

Defining the Model

In [11]:
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)
X_train = X_train/255.
X_test = X_test/255.

print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)
(6080, 32, 32, 1)
(1520, 32, 32, 1)
(6080, 14)
(1520, 14)
In [12]:
def math_symbol_and_digits_recognition(input_shape=(32, 32, 1)):
    regularizer = l2(0.01)
    model = Sequential()
    model.add(Input(shape=input_shape))
    model.add(Conv2D(32, (3, 3), strides=(1, 1), padding='same', 
                     kernel_initializer=glorot_uniform(seed=0), 
                     name='conv1', activity_regularizer=regularizer))
    model.add(Activation(activation='relu', name='act1'))
    model.add(MaxPool2D((2, 2), strides=(2, 2)))
    model.add(Conv2D(32, (3, 3), strides=(1, 1), padding='same', 
                     kernel_initializer=glorot_uniform(seed=0), 
                     name='conv2', activity_regularizer=regularizer))
    model.add(Activation(activation='relu', name='act2'))
    model.add(MaxPool2D((2, 2), strides=(2, 2)))
    model.add(Conv2D(64, (3, 3), strides=(1, 1), padding='same', 
                     kernel_initializer=glorot_uniform(seed=0), 
                     name='conv3', activity_regularizer=regularizer))
    model.add(Activation(activation='relu', name='act3'))
    model.add(MaxPool2D((2, 2), strides=(2, 2)))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(120, activation='relu', kernel_initializer=glorot_uniform(seed=0), name='fc1'))
    model.add(Dense(84, activation='relu', kernel_initializer=glorot_uniform(seed=0), name='fc2'))
    model.add(Dense(14, activation='softmax', kernel_initializer=glorot_uniform(seed=0), name='fc3'))
    
    optimizer = Adam()
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model
In [13]:
model = math_symbol_and_digits_recognition(input_shape=(32, 32, 1))
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv1 (Conv2D)               (None, 32, 32, 32)        320       
_________________________________________________________________
act1 (Activation)            (None, 32, 32, 32)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 32)        0         
_________________________________________________________________
conv2 (Conv2D)               (None, 16, 16, 32)        9248      
_________________________________________________________________
act2 (Activation)            (None, 16, 16, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
conv3 (Conv2D)               (None, 8, 8, 64)          18496     
_________________________________________________________________
act3 (Activation)            (None, 8, 8, 64)          0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 4, 4, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1024)              0         
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
fc1 (Dense)                  (None, 120)               123000    
_________________________________________________________________
fc2 (Dense)                  (None, 84)                10164     
_________________________________________________________________
fc3 (Dense)                  (None, 14)                1190      
=================================================================
Total params: 162,418
Trainable params: 162,418
Non-trainable params: 0
_________________________________________________________________

Training the Model

In [14]:
def step_decay(epoch):
    initial_learning_rate = 0.001
    dropEvery = 10
    factor = 0.5
    lr = initial_learning_rate*(factor**np.floor((1 + epoch)/dropEvery))
    return float(lr)

checkpoint = ModelCheckpoint('maths_symbol_and_digits_recognition.h5', 
                             monitor='val_loss', save_best_only=True, 
                             verbose=1, mode='min')

callbacks = [checkpoint, LearningRateScheduler(step_decay)]
In [15]:
aug = ImageDataGenerator(zoom_range=0.1,
#                          rotation_range=5,
                         width_shift_range=0.05,
                         height_shift_range=0.05)
hist = model.fit(aug.flow(X_train, Y_train, batch_size=128), batch_size=128, epochs=100, validation_data=(X_test, Y_test))
# hist2 = model2.fit(X_train, Y_train, batch_size=128, epochs=100, validation_data=(X_test, Y_test), callbacks=callbacks)
Epoch 1/100
48/48 [==============================] - 1s 27ms/step - loss: 2.7972 - accuracy: 0.2403 - val_loss: 2.2951 - val_accuracy: 0.3717
Epoch 2/100
48/48 [==============================] - 1s 22ms/step - loss: 1.7852 - accuracy: 0.5433 - val_loss: 1.5321 - val_accuracy: 0.6737
Epoch 3/100
48/48 [==============================] - 1s 22ms/step - loss: 1.2530 - accuracy: 0.6809 - val_loss: 1.3674 - val_accuracy: 0.6605
Epoch 4/100
48/48 [==============================] - 1s 22ms/step - loss: 0.9747 - accuracy: 0.7623 - val_loss: 0.9905 - val_accuracy: 0.7895
Epoch 5/100
48/48 [==============================] - 1s 22ms/step - loss: 0.8385 - accuracy: 0.7980 - val_loss: 1.1084 - val_accuracy: 0.7421
Epoch 6/100
48/48 [==============================] - 1s 22ms/step - loss: 0.7769 - accuracy: 0.8137 - val_loss: 0.7589 - val_accuracy: 0.8480
Epoch 7/100
48/48 [==============================] - 1s 22ms/step - loss: 0.6818 - accuracy: 0.8418 - val_loss: 0.8013 - val_accuracy: 0.8276
Epoch 8/100
48/48 [==============================] - 1s 22ms/step - loss: 0.6159 - accuracy: 0.8623 - val_loss: 0.7431 - val_accuracy: 0.8507
Epoch 9/100
48/48 [==============================] - 1s 22ms/step - loss: 0.5731 - accuracy: 0.8714 - val_loss: 0.7076 - val_accuracy: 0.8539
Epoch 10/100
48/48 [==============================] - 1s 22ms/step - loss: 0.5382 - accuracy: 0.8808 - val_loss: 0.7125 - val_accuracy: 0.8539
Epoch 11/100
48/48 [==============================] - 1s 22ms/step - loss: 0.4859 - accuracy: 0.8942 - val_loss: 0.4906 - val_accuracy: 0.9296
Epoch 12/100
48/48 [==============================] - 1s 22ms/step - loss: 0.4751 - accuracy: 0.8975 - val_loss: 0.5505 - val_accuracy: 0.8967
Epoch 13/100
48/48 [==============================] - 1s 22ms/step - loss: 0.4447 - accuracy: 0.9081 - val_loss: 0.5212 - val_accuracy: 0.9158
Epoch 14/100
48/48 [==============================] - 1s 22ms/step - loss: 0.4146 - accuracy: 0.9156 - val_loss: 0.5727 - val_accuracy: 0.8914
Epoch 15/100
48/48 [==============================] - 1s 22ms/step - loss: 0.3782 - accuracy: 0.9225 - val_loss: 0.5948 - val_accuracy: 0.8671
Epoch 16/100
48/48 [==============================] - 1s 22ms/step - loss: 0.3741 - accuracy: 0.9212 - val_loss: 0.4590 - val_accuracy: 0.9132
Epoch 17/100
48/48 [==============================] - 1s 22ms/step - loss: 0.3334 - accuracy: 0.9329 - val_loss: 0.3933 - val_accuracy: 0.9336
Epoch 18/100
48/48 [==============================] - 1s 22ms/step - loss: 0.3364 - accuracy: 0.9291 - val_loss: 0.3859 - val_accuracy: 0.9388
Epoch 19/100
48/48 [==============================] - 1s 22ms/step - loss: 0.3214 - accuracy: 0.9331 - val_loss: 0.4345 - val_accuracy: 0.9158
Epoch 20/100
48/48 [==============================] - 1s 22ms/step - loss: 0.3035 - accuracy: 0.9373 - val_loss: 0.3399 - val_accuracy: 0.9487
Epoch 21/100
48/48 [==============================] - 1s 22ms/step - loss: 0.3273 - accuracy: 0.9281 - val_loss: 0.4010 - val_accuracy: 0.9342
Epoch 22/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2893 - accuracy: 0.9403 - val_loss: 0.3738 - val_accuracy: 0.9349
Epoch 23/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2652 - accuracy: 0.9492 - val_loss: 0.2777 - val_accuracy: 0.9684
Epoch 24/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2684 - accuracy: 0.9452 - val_loss: 0.2756 - val_accuracy: 0.9684
Epoch 25/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2593 - accuracy: 0.9475 - val_loss: 0.3006 - val_accuracy: 0.9553
Epoch 26/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2488 - accuracy: 0.9472 - val_loss: 0.2982 - val_accuracy: 0.9572
Epoch 27/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2412 - accuracy: 0.9515 - val_loss: 0.2686 - val_accuracy: 0.9599
Epoch 28/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2481 - accuracy: 0.9479 - val_loss: 0.3162 - val_accuracy: 0.9507
Epoch 29/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2430 - accuracy: 0.9510 - val_loss: 0.2566 - val_accuracy: 0.9664
Epoch 30/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2248 - accuracy: 0.9569 - val_loss: 0.2730 - val_accuracy: 0.9572
Epoch 31/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2240 - accuracy: 0.9512 - val_loss: 0.3009 - val_accuracy: 0.9559
Epoch 32/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2034 - accuracy: 0.9604 - val_loss: 0.2348 - val_accuracy: 0.9691
Epoch 33/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2182 - accuracy: 0.9507 - val_loss: 0.3139 - val_accuracy: 0.9493
Epoch 34/100
48/48 [==============================] - 1s 23ms/step - loss: 0.2107 - accuracy: 0.9600 - val_loss: 0.2498 - val_accuracy: 0.9651
Epoch 35/100
48/48 [==============================] - 1s 22ms/step - loss: 0.2086 - accuracy: 0.9566 - val_loss: 0.2422 - val_accuracy: 0.9651
Epoch 36/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1964 - accuracy: 0.9617 - val_loss: 0.2049 - val_accuracy: 0.9776
Epoch 37/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1899 - accuracy: 0.9625 - val_loss: 0.2002 - val_accuracy: 0.9770
Epoch 38/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1845 - accuracy: 0.9627 - val_loss: 0.2493 - val_accuracy: 0.9618
Epoch 39/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1812 - accuracy: 0.9638 - val_loss: 0.1878 - val_accuracy: 0.9757
Epoch 40/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1837 - accuracy: 0.9661 - val_loss: 0.2144 - val_accuracy: 0.9737
Epoch 41/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1797 - accuracy: 0.9648 - val_loss: 0.2159 - val_accuracy: 0.9664
Epoch 42/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1638 - accuracy: 0.9691 - val_loss: 0.2136 - val_accuracy: 0.9678
Epoch 43/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1615 - accuracy: 0.9671 - val_loss: 0.1952 - val_accuracy: 0.9757
Epoch 44/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1622 - accuracy: 0.9697 - val_loss: 0.2395 - val_accuracy: 0.9605
Epoch 45/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1875 - accuracy: 0.9581 - val_loss: 0.2131 - val_accuracy: 0.9678
Epoch 46/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1618 - accuracy: 0.9706 - val_loss: 0.1830 - val_accuracy: 0.9770
Epoch 47/100
48/48 [==============================] - 1s 23ms/step - loss: 0.1574 - accuracy: 0.9671 - val_loss: 0.1894 - val_accuracy: 0.9724
Epoch 48/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1501 - accuracy: 0.9712 - val_loss: 0.1694 - val_accuracy: 0.9763
Epoch 49/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1502 - accuracy: 0.9727 - val_loss: 0.1761 - val_accuracy: 0.9770
Epoch 50/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1489 - accuracy: 0.9720 - val_loss: 0.1679 - val_accuracy: 0.9789
Epoch 51/100
48/48 [==============================] - 1s 23ms/step - loss: 0.1509 - accuracy: 0.9671 - val_loss: 0.1638 - val_accuracy: 0.9776
Epoch 52/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1463 - accuracy: 0.9684 - val_loss: 0.1593 - val_accuracy: 0.9796
Epoch 53/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1512 - accuracy: 0.9709 - val_loss: 0.1630 - val_accuracy: 0.9763
Epoch 54/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1455 - accuracy: 0.9720 - val_loss: 0.1876 - val_accuracy: 0.9704
Epoch 55/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1513 - accuracy: 0.9694 - val_loss: 0.1766 - val_accuracy: 0.9763
Epoch 56/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1460 - accuracy: 0.9706 - val_loss: 0.1758 - val_accuracy: 0.9743
Epoch 57/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1332 - accuracy: 0.9762 - val_loss: 0.1676 - val_accuracy: 0.9763
Epoch 58/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1388 - accuracy: 0.9737 - val_loss: 0.1635 - val_accuracy: 0.9776
Epoch 59/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1332 - accuracy: 0.9720 - val_loss: 0.1555 - val_accuracy: 0.9783
Epoch 60/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1294 - accuracy: 0.9750 - val_loss: 0.1744 - val_accuracy: 0.9730
Epoch 61/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1339 - accuracy: 0.9697 - val_loss: 0.1486 - val_accuracy: 0.9803
Epoch 62/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1249 - accuracy: 0.9757 - val_loss: 0.1465 - val_accuracy: 0.9796
Epoch 63/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1249 - accuracy: 0.9763 - val_loss: 0.1331 - val_accuracy: 0.9829
Epoch 64/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1200 - accuracy: 0.9796 - val_loss: 0.1443 - val_accuracy: 0.9796
Epoch 65/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1414 - accuracy: 0.9696 - val_loss: 0.1526 - val_accuracy: 0.9803
Epoch 66/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1285 - accuracy: 0.9742 - val_loss: 0.1778 - val_accuracy: 0.9724
Epoch 67/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1114 - accuracy: 0.9781 - val_loss: 0.1535 - val_accuracy: 0.9783
Epoch 68/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1119 - accuracy: 0.9788 - val_loss: 0.1682 - val_accuracy: 0.9730
Epoch 69/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1287 - accuracy: 0.9730 - val_loss: 0.1342 - val_accuracy: 0.9829
Epoch 70/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1309 - accuracy: 0.9752 - val_loss: 0.1366 - val_accuracy: 0.9783
Epoch 71/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1171 - accuracy: 0.9766 - val_loss: 0.1576 - val_accuracy: 0.9737
Epoch 72/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1264 - accuracy: 0.9771 - val_loss: 0.1527 - val_accuracy: 0.9796
Epoch 73/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1139 - accuracy: 0.9793 - val_loss: 0.1342 - val_accuracy: 0.9809
Epoch 74/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1102 - accuracy: 0.9789 - val_loss: 0.1786 - val_accuracy: 0.9678
Epoch 75/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0942 - accuracy: 0.9842 - val_loss: 0.1228 - val_accuracy: 0.9862
Epoch 76/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1099 - accuracy: 0.9801 - val_loss: 0.1249 - val_accuracy: 0.9822
Epoch 77/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1074 - accuracy: 0.9791 - val_loss: 0.1303 - val_accuracy: 0.9803
Epoch 78/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1080 - accuracy: 0.9789 - val_loss: 0.1286 - val_accuracy: 0.9836
Epoch 79/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1112 - accuracy: 0.9778 - val_loss: 0.1208 - val_accuracy: 0.9829
Epoch 80/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1065 - accuracy: 0.9816 - val_loss: 0.1510 - val_accuracy: 0.9737
Epoch 81/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1042 - accuracy: 0.9798 - val_loss: 0.1223 - val_accuracy: 0.9822
Epoch 82/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0982 - accuracy: 0.9816 - val_loss: 0.1129 - val_accuracy: 0.9829
Epoch 83/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1111 - accuracy: 0.9780 - val_loss: 0.1426 - val_accuracy: 0.9809
Epoch 84/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1064 - accuracy: 0.9796 - val_loss: 0.1409 - val_accuracy: 0.9809
Epoch 85/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1066 - accuracy: 0.9783 - val_loss: 0.1235 - val_accuracy: 0.9855
Epoch 86/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1096 - accuracy: 0.9776 - val_loss: 0.1929 - val_accuracy: 0.9658
Epoch 87/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1134 - accuracy: 0.9783 - val_loss: 0.1249 - val_accuracy: 0.9836
Epoch 88/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0994 - accuracy: 0.9806 - val_loss: 0.1368 - val_accuracy: 0.9829
Epoch 89/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0901 - accuracy: 0.9831 - val_loss: 0.1055 - val_accuracy: 0.9868
Epoch 90/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1062 - accuracy: 0.9776 - val_loss: 0.2606 - val_accuracy: 0.9480
Epoch 91/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1020 - accuracy: 0.9808 - val_loss: 0.1266 - val_accuracy: 0.9822
Epoch 92/100
48/48 [==============================] - 1s 22ms/step - loss: 0.1003 - accuracy: 0.9809 - val_loss: 0.1136 - val_accuracy: 0.9875
Epoch 93/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0924 - accuracy: 0.9842 - val_loss: 0.1238 - val_accuracy: 0.9816
Epoch 94/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0998 - accuracy: 0.9817 - val_loss: 0.1140 - val_accuracy: 0.9862
Epoch 95/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0952 - accuracy: 0.9831 - val_loss: 0.1056 - val_accuracy: 0.9842
Epoch 96/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0870 - accuracy: 0.9850 - val_loss: 0.1085 - val_accuracy: 0.9868
Epoch 97/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0904 - accuracy: 0.9831 - val_loss: 0.1485 - val_accuracy: 0.9730
Epoch 98/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0991 - accuracy: 0.9789 - val_loss: 0.1115 - val_accuracy: 0.9862
Epoch 99/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0919 - accuracy: 0.9852 - val_loss: 0.1203 - val_accuracy: 0.9822
Epoch 100/100
48/48 [==============================] - 1s 22ms/step - loss: 0.0925 - accuracy: 0.9826 - val_loss: 0.1556 - val_accuracy: 0.9730

Loss and Accuracy Plot

In [16]:
figure = plt.figure(figsize=(10, 10))
plt.plot(hist.history['accuracy'], label='Train Set Accuracy')
plt.plot(hist.history['val_accuracy'], label='Test Set Accuracy')
plt.title('Accuracy Plot')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='upper right')
plt.show()

figure2 = plt.figure(figsize=(10, 10))
plt.plot(hist.history['loss'], label='Train Set Loss')
plt.plot(hist.history['val_loss'], label='Test Set Loss')
plt.title('Loss Plot')
plt.xlabel('Epochs')
plt.ylabel('Loss Value')
plt.legend(loc='upper right')
plt.show()

Classification Report

In [17]:
ypred = model.predict(X_test)
ypred = np.argmax(ypred, axis=1)
Y_test_hat = np.argmax(Y_test, axis=1)
print(classification_report(Y_test_hat, ypred))
              precision    recall  f1-score   support

           0       0.94      0.97      0.96       113
           1       0.99      0.93      0.96       115
           2       0.99      0.99      0.99        97
           3       1.00      0.97      0.98       116
           4       0.94      0.99      0.97       101
           5       1.00      0.85      0.92        82
           6       0.89      0.99      0.94       120
           7       0.93      1.00      0.97        86
           8       0.98      0.97      0.97       127
           9       0.98      1.00      0.99       102
          10       1.00      0.96      0.98       113
          11       1.00      1.00      1.00        84
          12       0.99      0.99      0.99       114
          13       1.00      0.99      1.00       150

    accuracy                           0.97      1520
   macro avg       0.97      0.97      0.97      1520
weighted avg       0.97      0.97      0.97      1520

Confusion Matrix

In [18]:
matrix = confusion_matrix(Y_test_hat, ypred)
df_cm = pd.DataFrame(matrix, index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], 
                     columns=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
figure = plt.figure(figsize=(20, 10))
sn.heatmap(df_cm, annot=True, fmt='d')
Out[18]:
<AxesSubplot:>

Saving the Model

In [19]:
model.save('maths_symbol_and_digit_recognition.h5')

Testing the Model

In [20]:
def test_pipeline(image_path):
    img = cv2.imread(image_path)
    img = cv2.resize(img, (800, 800))
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # blurred = cv2.GaussianBlur(img_gray, (3, 3), 0)
    edged = cv2.Canny(img_gray, 30, 150)
    contours = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = imutils.grab_contours(contours)
    contours = sort_contours(contours, method="left-to-right")[0]
    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'add', 'div', 'mul', 'sub']

    for c in contours:
        (x, y, w, h) = cv2.boundingRect(c)
        if 20<=w and 30<=h:
            roi = img_gray[y:y+h, x:x+w]
            thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
            (th, tw) = thresh.shape
            if tw > th:
                thresh = imutils.resize(thresh, width=32)
            if th > tw:
                thresh = imutils.resize(thresh, height=32)
            (th, tw) = thresh.shape
            dx = int(max(0, 32 - tw)/2.0)
            dy = int(max(0, 32 - th) / 2.0)
            padded = cv2.copyMakeBorder(thresh, top=dy, bottom=dy, left=dx, right=dx, borderType=cv2.BORDER_CONSTANT,
                                       value=(0, 0, 0))
            padded = cv2.resize(padded, (32, 32))
            padded = np.array(padded)
            padded = padded/255.
            padded = np.expand_dims(padded, axis=0)
            padded = np.expand_dims(padded, axis=-1)
            pred = model.predict(padded)
            pred = np.argmax(pred, axis=1)
            label = labels[pred[0]]
            cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 2)
            cv2.putText(img, label, (x-5, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

    figure = plt.figure(figsize=(10, 10))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    plt.axis('off')
    plt.show()
In [22]:
test_pipeline('data/test.jpg')

Pipeline for Expression Solving

For example the expression to be solved is 22+16x16. As the current model doesn't recognizes brackets, so this expression is interpreted as 22+(16x16) and similar convention is used for the pipeline.

In [23]:
def test_pipeline_equation(image_path):
    chars = []
    img = cv2.imread(image_path)
    img = cv2.resize(img, (800, 800))
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # blurred = cv2.GaussianBlur(img_gray, (3, 3), 0)
    edged = cv2.Canny(img_gray, 30, 150)
    contours = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = imutils.grab_contours(contours)
    contours = sort_contours(contours, method="left-to-right")[0]
    labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'add', 'div', 'mul', 'sub']

    for c in contours:
        (x, y, w, h) = cv2.boundingRect(c)
        if 20<=w and 30<=h:
            roi = img_gray[y:y+h, x:x+w]
            thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
            (th, tw) = thresh.shape
            if tw > th:
                thresh = imutils.resize(thresh, width=32)
            if th > tw:
                thresh = imutils.resize(thresh, height=32)
            (th, tw) = thresh.shape
            dx = int(max(0, 32 - tw)/2.0)
            dy = int(max(0, 32 - th) / 2.0)
            padded = cv2.copyMakeBorder(thresh, top=dy, bottom=dy, left=dx, right=dx, borderType=cv2.BORDER_CONSTANT,
                                       value=(0, 0, 0))
            padded = cv2.resize(padded, (32, 32))
            padded = np.array(padded)
            padded = padded/255.
            padded = np.expand_dims(padded, axis=0)
            padded = np.expand_dims(padded, axis=-1)
            pred = model.predict(padded)
            pred = np.argmax(pred, axis=1)
    #         print(pred)
            label = labels[pred[0]]
            chars.append(label)
            cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 2)
            cv2.putText(img, label, (x-5, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))

    figure = plt.figure(figsize=(10, 10))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    plt.axis('off')
    plt.show()
    
    e = ''
    for i in chars:
        if i=='add':
            e += '+'
        elif i=='sub':
            e += '-'
        elif i=='mul':
            e += '*'
        elif i=='div':
            e += '/'
        else:
            e += i
    v = eval(e)
    print('Value of the expression {} : {}'.format(e, v)) 
In [24]:
test_pipeline_equation('data/test_equation4.jpg')
Value of the expression 22+16*16 : 278
In [ ]: