Handwritten Optical Character Recognition Calculator¶
Credit: AITS Cainvas Community
Photo by Pavelas Laptevas for Cub Studio on Dribbble
Importing Necessary Libraries¶
In [1]:
import numpy as np
import cv2
import os
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Activation, MaxPool2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import seaborn as sn
import matplotlib.pyplot as plt
import pandas as pd
import imutils
from imutils.contours import sort_contours
In [2]:
!wget https://cainvas-static.s3.amazonaws.com/media/user_data/Yuvnish17/data.zip
!unzip -qo data.zip
Loading the Dataset¶
In [3]:
x = []
y = []
datadir = 'data/dataset'
for folder in os.listdir(datadir):
path = os.path.join(datadir, folder)
for images in os.listdir(path):
img = cv2.imread(os.path.join(path, images))
x.append(img)
y.append(folder)
print(len(x))
print(len(y))
print(f'labels : {list(set(y))}')
Visualizing Images in the Dataset¶
In [4]:
figure = plt.figure(figsize=(10, 10))
j = 0
for i in list(set(y)):
idx = y.index(i)
img = x[idx]
img = cv2.resize(img, (256, 256))
figure.add_subplot(5, 5, j+1)
plt.imshow(img)
plt.axis('off')
plt.title(i)
j += 1
Data Distribution of the Dataset¶
In [5]:
unique, count = np.unique(y, return_counts=True)
figure = plt.figure(figsize=(20, 10))
sn.barplot(unique, count).set_title('Number of Images per Category')
plt.show()
As can been seen, the dataset is not much imbalanced. So balancing is not required here much.
Preprocessing the Data¶
In [6]:
X = []
for i in range(len(x)):
# print(i)
img = x[i]
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshold_image = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1]
threshold_image = cv2.resize(threshold_image, (32, 32))
X.append(threshold_image)
print(len(X))
In [7]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
print(len(y))
In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)
Data Distribution in Train and Test Set¶
In [9]:
unique_train, count_train = np.unique(Y_train, return_counts=True)
figure = plt.figure(figsize=(20, 10))
sn.barplot(unique_train, count_train).set_title('Number of Images per category in Train Set')
plt.show()
In [10]:
unique_test, count_test = np.unique(Y_test, return_counts=True)
figure = plt.figure(figsize=(20, 10))
sn.barplot(unique_test, count_test).set_title('Number of Images per category in Test Set')
plt.show()
Defining the Model¶
In [11]:
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)
Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)
X_train = X_train/255.
X_test = X_test/255.
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)
In [12]:
def math_symbol_and_digits_recognition(input_shape=(32, 32, 1)):
regularizer = l2(0.01)
model = Sequential()
model.add(Input(shape=input_shape))
model.add(Conv2D(32, (3, 3), strides=(1, 1), padding='same',
kernel_initializer=glorot_uniform(seed=0),
name='conv1', activity_regularizer=regularizer))
model.add(Activation(activation='relu', name='act1'))
model.add(MaxPool2D((2, 2), strides=(2, 2)))
model.add(Conv2D(32, (3, 3), strides=(1, 1), padding='same',
kernel_initializer=glorot_uniform(seed=0),
name='conv2', activity_regularizer=regularizer))
model.add(Activation(activation='relu', name='act2'))
model.add(MaxPool2D((2, 2), strides=(2, 2)))
model.add(Conv2D(64, (3, 3), strides=(1, 1), padding='same',
kernel_initializer=glorot_uniform(seed=0),
name='conv3', activity_regularizer=regularizer))
model.add(Activation(activation='relu', name='act3'))
model.add(MaxPool2D((2, 2), strides=(2, 2)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(120, activation='relu', kernel_initializer=glorot_uniform(seed=0), name='fc1'))
model.add(Dense(84, activation='relu', kernel_initializer=glorot_uniform(seed=0), name='fc2'))
model.add(Dense(14, activation='softmax', kernel_initializer=glorot_uniform(seed=0), name='fc3'))
optimizer = Adam()
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
return model
In [13]:
model = math_symbol_and_digits_recognition(input_shape=(32, 32, 1))
model.summary()
Training the Model¶
In [14]:
def step_decay(epoch):
initial_learning_rate = 0.001
dropEvery = 10
factor = 0.5
lr = initial_learning_rate*(factor**np.floor((1 + epoch)/dropEvery))
return float(lr)
checkpoint = ModelCheckpoint('maths_symbol_and_digits_recognition.h5',
monitor='val_loss', save_best_only=True,
verbose=1, mode='min')
callbacks = [checkpoint, LearningRateScheduler(step_decay)]
In [15]:
aug = ImageDataGenerator(zoom_range=0.1,
# rotation_range=5,
width_shift_range=0.05,
height_shift_range=0.05)
hist = model.fit(aug.flow(X_train, Y_train, batch_size=128), batch_size=128, epochs=100, validation_data=(X_test, Y_test))
# hist2 = model2.fit(X_train, Y_train, batch_size=128, epochs=100, validation_data=(X_test, Y_test), callbacks=callbacks)
Loss and Accuracy Plot¶
In [16]:
figure = plt.figure(figsize=(10, 10))
plt.plot(hist.history['accuracy'], label='Train Set Accuracy')
plt.plot(hist.history['val_accuracy'], label='Test Set Accuracy')
plt.title('Accuracy Plot')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='upper right')
plt.show()
figure2 = plt.figure(figsize=(10, 10))
plt.plot(hist.history['loss'], label='Train Set Loss')
plt.plot(hist.history['val_loss'], label='Test Set Loss')
plt.title('Loss Plot')
plt.xlabel('Epochs')
plt.ylabel('Loss Value')
plt.legend(loc='upper right')
plt.show()
Classification Report¶
In [17]:
ypred = model.predict(X_test)
ypred = np.argmax(ypred, axis=1)
Y_test_hat = np.argmax(Y_test, axis=1)
print(classification_report(Y_test_hat, ypred))
Confusion Matrix¶
In [18]:
matrix = confusion_matrix(Y_test_hat, ypred)
df_cm = pd.DataFrame(matrix, index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
columns=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
figure = plt.figure(figsize=(20, 10))
sn.heatmap(df_cm, annot=True, fmt='d')
Out[18]:
Saving the Model¶
In [19]:
model.save('maths_symbol_and_digit_recognition.h5')
Testing the Model¶
In [20]:
def test_pipeline(image_path):
img = cv2.imread(image_path)
img = cv2.resize(img, (800, 800))
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# blurred = cv2.GaussianBlur(img_gray, (3, 3), 0)
edged = cv2.Canny(img_gray, 30, 150)
contours = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
contours = sort_contours(contours, method="left-to-right")[0]
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'add', 'div', 'mul', 'sub']
for c in contours:
(x, y, w, h) = cv2.boundingRect(c)
if 20<=w and 30<=h:
roi = img_gray[y:y+h, x:x+w]
thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
(th, tw) = thresh.shape
if tw > th:
thresh = imutils.resize(thresh, width=32)
if th > tw:
thresh = imutils.resize(thresh, height=32)
(th, tw) = thresh.shape
dx = int(max(0, 32 - tw)/2.0)
dy = int(max(0, 32 - th) / 2.0)
padded = cv2.copyMakeBorder(thresh, top=dy, bottom=dy, left=dx, right=dx, borderType=cv2.BORDER_CONSTANT,
value=(0, 0, 0))
padded = cv2.resize(padded, (32, 32))
padded = np.array(padded)
padded = padded/255.
padded = np.expand_dims(padded, axis=0)
padded = np.expand_dims(padded, axis=-1)
pred = model.predict(padded)
pred = np.argmax(pred, axis=1)
label = labels[pred[0]]
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 2)
cv2.putText(img, label, (x-5, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
figure = plt.figure(figsize=(10, 10))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.axis('off')
plt.show()
In [21]:
test_pipeline('data/test.jpg')
Pipeline for Expression Solving¶
For example the expression to be solved is 22+16x16. As the current model doesn't recognizes brackets, so this expression is interpreted as 22+(16x16) and similar convention is used for the pipeline.
In [22]:
def test_pipeline_equation(image_path):
chars = []
img = cv2.imread(image_path)
img = cv2.resize(img, (800, 800))
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# blurred = cv2.GaussianBlur(img_gray, (3, 3), 0)
edged = cv2.Canny(img_gray, 30, 150)
contours = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
contours = sort_contours(contours, method="left-to-right")[0]
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'add', 'div', 'mul', 'sub']
for c in contours:
(x, y, w, h) = cv2.boundingRect(c)
if 20<=w and 30<=h:
roi = img_gray[y:y+h, x:x+w]
thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
(th, tw) = thresh.shape
if tw > th:
thresh = imutils.resize(thresh, width=32)
if th > tw:
thresh = imutils.resize(thresh, height=32)
(th, tw) = thresh.shape
dx = int(max(0, 32 - tw)/2.0)
dy = int(max(0, 32 - th) / 2.0)
padded = cv2.copyMakeBorder(thresh, top=dy, bottom=dy, left=dx, right=dx, borderType=cv2.BORDER_CONSTANT,
value=(0, 0, 0))
padded = cv2.resize(padded, (32, 32))
padded = np.array(padded)
padded = padded/255.
padded = np.expand_dims(padded, axis=0)
padded = np.expand_dims(padded, axis=-1)
pred = model.predict(padded)
pred = np.argmax(pred, axis=1)
# print(pred)
label = labels[pred[0]]
chars.append(label)
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 2)
cv2.putText(img, label, (x-5, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
figure = plt.figure(figsize=(10, 10))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.axis('off')
plt.show()
e = ''
for i in chars:
if i=='add':
e += '+'
elif i=='sub':
e += '-'
elif i=='mul':
e += '*'
elif i=='div':
e += '/'
else:
e += i
v = eval(e)
print('Value of the expression {} : {}'.format(e, v))
In [23]:
test_pipeline_equation('data/test_equation4.jpg')