Handwritten Optical Character Recognition Calculator¶
Importing Necessary Libraries¶
import numpy as np
import cv2
import os
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Activation, MaxPool2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.regularizers import l2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import seaborn as sn
import matplotlib.pyplot as plt
import pandas as pd
import imutils
from imutils.contours import sort_contours
!wget https://cainvas-static.s3.amazonaws.com/media/user_data/Yuvnish17/data.zip
!unzip -qo data.zip
Loading the Dataset¶
x = []
y = []
datadir = 'data/dataset'
for folder in os.listdir(datadir):
path = os.path.join(datadir, folder)
for images in os.listdir(path):
img = cv2.imread(os.path.join(path, images))
print(f'labels : {list(set(y))}')
Visualizing Images in the Dataset¶
figure = plt.figure(figsize=(10, 10))
j = 0
for i in list(set(y)):
idx = y.index(i)
img = x[idx]
img = cv2.resize(img, (256, 256))
figure.add_subplot(5, 5, j+1)
j += 1
Data Distribution of the Dataset¶
unique, count = np.unique(y, return_counts=True)
figure = plt.figure(figsize=(20, 10))
sn.barplot(unique, count).set_title('Number of Images per Category')
As can been seen, the dataset is not much imbalanced. So balancing is not required here much.
Preprocessing the Data¶
X = []
for i in range(len(x)):
# print(i)
img = x[i]
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshold_image = cv2.threshold(img_gray, 0, 255, cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1]
threshold_image = cv2.resize(threshold_image, (32, 32))
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)
Data Distribution in Train and Test Set¶
unique_train, count_train = np.unique(Y_train, return_counts=True)
figure = plt.figure(figsize=(20, 10))
sn.barplot(unique_train, count_train).set_title('Number of Images per category in Train Set')
unique_test, count_test = np.unique(Y_test, return_counts=True)
figure = plt.figure(figsize=(20, 10))
sn.barplot(unique_test, count_test).set_title('Number of Images per category in Test Set')
Defining the Model¶
X_train = np.array(X_train)
X_test = np.array(X_test)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)
Y_train = to_categorical(Y_train)
Y_test = to_categorical(Y_test)
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)
X_train = X_train/255.
X_test = X_test/255.
def math_symbol_and_digits_recognition(input_shape=(32, 32, 1)):
regularizer = l2(0.01)
model = Sequential()
model.add(Conv2D(32, (3, 3), strides=(1, 1), padding='same',
name='conv1', activity_regularizer=regularizer))
model.add(Activation(activation='relu', name='act1'))
model.add(MaxPool2D((2, 2), strides=(2, 2)))
model.add(Conv2D(32, (3, 3), strides=(1, 1), padding='same',
name='conv2', activity_regularizer=regularizer))
model.add(Activation(activation='relu', name='act2'))
model.add(MaxPool2D((2, 2), strides=(2, 2)))
model.add(Conv2D(64, (3, 3), strides=(1, 1), padding='same',
name='conv3', activity_regularizer=regularizer))
model.add(Activation(activation='relu', name='act3'))
model.add(MaxPool2D((2, 2), strides=(2, 2)))
model.add(Dense(120, activation='relu', kernel_initializer=glorot_uniform(seed=0), name='fc1'))
model.add(Dense(84, activation='relu', kernel_initializer=glorot_uniform(seed=0), name='fc2'))
model.add(Dense(14, activation='softmax', kernel_initializer=glorot_uniform(seed=0), name='fc3'))
optimizer = Adam()
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
return model
model = math_symbol_and_digits_recognition(input_shape=(32, 32, 1))
Training the Model¶
def step_decay(epoch):
initial_learning_rate = 0.001
dropEvery = 10
factor = 0.5
lr = initial_learning_rate*(factor**np.floor((1 + epoch)/dropEvery))
return float(lr)
checkpoint = ModelCheckpoint('maths_symbol_and_digits_recognition.h5',
monitor='val_loss', save_best_only=True,
verbose=1, mode='min')
callbacks = [checkpoint, LearningRateScheduler(step_decay)]
aug = ImageDataGenerator(zoom_range=0.1,
# rotation_range=5,
hist = model.fit(aug.flow(X_train, Y_train, batch_size=128), batch_size=128, epochs=100, validation_data=(X_test, Y_test))
# hist2 = model2.fit(X_train, Y_train, batch_size=128, epochs=100, validation_data=(X_test, Y_test), callbacks=callbacks)
Loss and Accuracy Plot¶
figure = plt.figure(figsize=(10, 10))
plt.plot(hist.history['accuracy'], label='Train Set Accuracy')
plt.plot(hist.history['val_accuracy'], label='Test Set Accuracy')
plt.title('Accuracy Plot')
plt.legend(loc='upper right')
figure2 = plt.figure(figsize=(10, 10))
plt.plot(hist.history['loss'], label='Train Set Loss')
plt.plot(hist.history['val_loss'], label='Test Set Loss')
plt.title('Loss Plot')
plt.ylabel('Loss Value')
plt.legend(loc='upper right')
Classification Report¶
ypred = model.predict(X_test)
ypred = np.argmax(ypred, axis=1)
Y_test_hat = np.argmax(Y_test, axis=1)
print(classification_report(Y_test_hat, ypred))
Confusion Matrix¶
matrix = confusion_matrix(Y_test_hat, ypred)
df_cm = pd.DataFrame(matrix, index=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
columns=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
figure = plt.figure(figsize=(20, 10))
sn.heatmap(df_cm, annot=True, fmt='d')
Saving the Model¶
Testing the Model¶
def test_pipeline(image_path):
img = cv2.imread(image_path)
img = cv2.resize(img, (800, 800))
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# blurred = cv2.GaussianBlur(img_gray, (3, 3), 0)
edged = cv2.Canny(img_gray, 30, 150)
contours = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
contours = sort_contours(contours, method="left-to-right")[0]
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'add', 'div', 'mul', 'sub']
for c in contours:
(x, y, w, h) = cv2.boundingRect(c)
if 20<=w and 30<=h:
roi = img_gray[y:y+h, x:x+w]
thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
(th, tw) = thresh.shape
if tw > th:
thresh = imutils.resize(thresh, width=32)
if th > tw:
thresh = imutils.resize(thresh, height=32)
(th, tw) = thresh.shape
dx = int(max(0, 32 - tw)/2.0)
dy = int(max(0, 32 - th) / 2.0)
padded = cv2.copyMakeBorder(thresh, top=dy, bottom=dy, left=dx, right=dx, borderType=cv2.BORDER_CONSTANT,
value=(0, 0, 0))
padded = cv2.resize(padded, (32, 32))
padded = np.array(padded)
padded = padded/255.
padded = np.expand_dims(padded, axis=0)
padded = np.expand_dims(padded, axis=-1)
pred = model.predict(padded)
pred = np.argmax(pred, axis=1)
label = labels[pred[0]]
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 2)
cv2.putText(img, label, (x-5, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
figure = plt.figure(figsize=(10, 10))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
Pipeline for Expression Solving¶
For example the expression to be solved is 22+16x16. As the current model doesn't recognizes brackets, so this expression is interpreted as 22+(16x16) and similar convention is used for the pipeline.
def test_pipeline_equation(image_path):
chars = []
img = cv2.imread(image_path)
img = cv2.resize(img, (800, 800))
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# blurred = cv2.GaussianBlur(img_gray, (3, 3), 0)
edged = cv2.Canny(img_gray, 30, 150)
contours = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
contours = sort_contours(contours, method="left-to-right")[0]
labels = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'add', 'div', 'mul', 'sub']
for c in contours:
(x, y, w, h) = cv2.boundingRect(c)
if 20<=w and 30<=h:
roi = img_gray[y:y+h, x:x+w]
thresh = cv2.threshold(roi, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
(th, tw) = thresh.shape
if tw > th:
thresh = imutils.resize(thresh, width=32)
if th > tw:
thresh = imutils.resize(thresh, height=32)
(th, tw) = thresh.shape
dx = int(max(0, 32 - tw)/2.0)
dy = int(max(0, 32 - th) / 2.0)
padded = cv2.copyMakeBorder(thresh, top=dy, bottom=dy, left=dx, right=dx, borderType=cv2.BORDER_CONSTANT,
value=(0, 0, 0))
padded = cv2.resize(padded, (32, 32))
padded = np.array(padded)
padded = padded/255.
padded = np.expand_dims(padded, axis=0)
padded = np.expand_dims(padded, axis=-1)
pred = model.predict(padded)
pred = np.argmax(pred, axis=1)
# print(pred)
label = labels[pred[0]]
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 2)
cv2.putText(img, label, (x-5, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
figure = plt.figure(figsize=(10, 10))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
e = ''
for i in chars:
if i=='add':
e += '+'
elif i=='sub':
e += '-'
elif i=='mul':
e += '*'
elif i=='div':
e += '/'
e += i
v = eval(e)
print('Value of the expression {} : {}'.format(e, v))
In [23]: