Cainvas

Identifying Arabic Digits

Credit: AITS Cainvas Community

Photo by Merso Design on Dribbble

How do you describe Arabic language? Arabic is a Central Semitic language, closely related to Aramaic and Hebrew. Standard or Classical Arabic – Fusha – is the distinct form of the language used in media, newspapers, literature and other formal settings and its one of the most beautiful languages.

In [1]:
# Import all the necessary libraries

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix

Unzip the Dataset

In [2]:
!wget 'https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/arabic.zip'

!unzip -qo arabic.zip 
!rm arabic.zip
--2021-12-08 05:41:47--  https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/arabic.zip
Resolving cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)... 52.219.66.20
Connecting to cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)|52.219.66.20|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 25159080 (24M) [application/x-zip-compressed]
Saving to: ‘arabic.zip’

arabic.zip          100%[===================>]  23.99M   107MB/s    in 0.2s    

2021-12-08 05:41:47 (107 MB/s) - ‘arabic.zip’ saved [25159080/25159080]

Load Training and Testing Labels

In [3]:
train=pd.read_csv('csvTrainImages 13440x1024.csv')
test=pd.read_csv('csvTestImages 3360x1024.csv')
train_label=pd.read_csv('csvTrainLabel 13440x1.csv')
test_label=pd.read_csv('csvTestLabel 3360x1.csv')
In [4]:
train.head()
Out[4]:
0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 ... 0.896 0.897 0.898 0.899 0.900 0.901 0.902 0.903 0.904 0.905
0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 1024 columns

Display the Shapes of Training and Testing Labels

In [5]:
print("Training Images Shape = ", train.shape)
print("Testing Images Shape = ",test.shape)
print("Training Labels Shape = ",train_label.shape)
print("Testing Labels Shape = ",test_label.shape)
Training Images Shape =  (13439, 1024)
Testing Images Shape =  (3359, 1024)
Training Labels Shape =  (13439, 1)
Testing Labels Shape =  (3359, 1)
In [6]:
labels = train_label.iloc[:,0].unique()
print(labels)

# Digits vary from 1 - 28
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28]
In [7]:
train=np.array(train)
test=np.array(test)
train_label=np.array(train_label)
test_label=np.array(test_label)

X = train
y0 = train_label

Encoding the labels using Label Binarizer

In [8]:
binencoder = LabelBinarizer()
y = binencoder.fit_transform(y0)
In [9]:
X_images = X.reshape(-1,32,32)
test_images = test.reshape(-1,32,32)

print(X_images.shape)
print(test_images.shape)
(13439, 32, 32)
(3359, 32, 32)
In [10]:
# Split the dataset into training and testing labels
X_train, X_test, y_train, y_test = train_test_split(X_images, y, test_size = 0.2, random_state=90)

Function to Visualize Some Images

In [11]:
def visualize_images(df, img_size, number_of_images, name):
    plt.figure(figsize=(8,8))
    
    n_rows = df.shape[0]
    f = plt.figure(figsize=(15,15)) # defining a figure
    reshaped_df = df.reshape(df.shape[0], img_size, img_size)
    number_of_rows = number_of_images/5 if number_of_images%5 == 0 else (number_of_images/5) +1
    for i in range(number_of_images):        
        f.add_subplot(number_of_rows, 5, i+1, xticks=[], yticks=[])
        #plt.figure(figsize = (7,7))
        plt.title(np.argmax(name[i]), color = 'blue', fontdict = {'size' : '25'})
        plt.imshow(reshaped_df[i], cmap='gray')
In [12]:
visualize_images(X_train, 32, 20, y_train)
/opt/tljh/user/lib/python3.7/site-packages/ipykernel_launcher.py:9: MatplotlibDeprecationWarning: Passing non-integers as three-element position specification is deprecated since 3.3 and will be removed two minor releases later.
  if __name__ == '__main__':
<Figure size 576x576 with 0 Axes>

Visualize pixels of one Image

In [13]:
def visualize_input(img, ax):
    img = img.reshape(32, 32)
    ax.imshow(img, cmap='gray')
    width, height = img.shape
    thresh = img.max()/2.5
    for x in range(width):
        for y in range(height):
            ax.annotate(str(round(img[x][y],2)), xy=(y,x),
                        horizontalalignment='center',
                        verticalalignment='center',
                        color='white' if img[x][y]<thresh else 'black')

fig = plt.figure(figsize = (15,15)) 
ax = fig.add_subplot(111, xticks=[], yticks=[])


visualize_input(X_train[0], ax)
In [ ]:
 
In [14]:
# Scaling and shaping the images
X_train = X_train/255
X_test = X_test/255

X_train = X_train.reshape(-1,32,32,1).astype('float32')
X_test = X_test.reshape(-1,32,32,1).astype('float32')

Model Architecture

In [15]:
# Defiining Early Stopping function to monitor Validation Loss
es = EarlyStopping(monitor='val_loss', patience=5)
In [16]:
# Defining Model Architecture
model = Sequential()

model.add(Conv2D(32,(3,3),input_shape=(32,32,1),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Dropout(0.2))
model.add(Flatten())

model.add(Dense(36,activation='relu'))
model.add(Dense(36, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(28, activation='sigmoid'))
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 30, 30, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 32)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 4, 4, 32)          9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 2, 2, 32)          0         
_________________________________________________________________
dropout (Dropout)            (None, 2, 2, 32)          0         
_________________________________________________________________
flatten (Flatten)            (None, 128)               0         
_________________________________________________________________
dense (Dense)                (None, 36)                4644      
_________________________________________________________________
dense_1 (Dense)              (None, 36)                1332      
_________________________________________________________________
dropout_1 (Dropout)          (None, 36)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 28)                1036      
=================================================================
Total params: 25,828
Trainable params: 25,828
Non-trainable params: 0
_________________________________________________________________
In [17]:
# Compile the model
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

Model Training

In [18]:
# Run the model for a batch size of 50 for 100 epochs
history = model.fit(X_train, 
                    y_train, 
                    validation_data = (X_test, y_test),
                    batch_size = 50,
                    epochs = 100,
                    callbacks = [es]
                   )
Epoch 1/100
216/216 [==============================] - 11s 53ms/step - loss: 2.9256 - accuracy: 0.1175 - val_loss: 2.1581 - val_accuracy: 0.3475
Epoch 2/100
216/216 [==============================] - 11s 52ms/step - loss: 2.0657 - accuracy: 0.3192 - val_loss: 1.6028 - val_accuracy: 0.4792
Epoch 3/100
216/216 [==============================] - 11s 52ms/step - loss: 1.6527 - accuracy: 0.4214 - val_loss: 1.2024 - val_accuracy: 0.5919
Epoch 4/100
216/216 [==============================] - 11s 52ms/step - loss: 1.3530 - accuracy: 0.5200 - val_loss: 0.9954 - val_accuracy: 0.6905
Epoch 5/100
216/216 [==============================] - 11s 52ms/step - loss: 1.1628 - accuracy: 0.5853 - val_loss: 0.8280 - val_accuracy: 0.7374
Epoch 6/100
216/216 [==============================] - 11s 52ms/step - loss: 1.0510 - accuracy: 0.6207 - val_loss: 0.7219 - val_accuracy: 0.7541
Epoch 7/100
216/216 [==============================] - 11s 52ms/step - loss: 0.9422 - accuracy: 0.6631 - val_loss: 0.6673 - val_accuracy: 0.7794
Epoch 8/100
216/216 [==============================] - 11s 51ms/step - loss: 0.8701 - accuracy: 0.6864 - val_loss: 0.7115 - val_accuracy: 0.7608
Epoch 9/100
216/216 [==============================] - 11s 52ms/step - loss: 0.8038 - accuracy: 0.7166 - val_loss: 0.5937 - val_accuracy: 0.7894
Epoch 10/100
216/216 [==============================] - 11s 52ms/step - loss: 0.7555 - accuracy: 0.7339 - val_loss: 0.6356 - val_accuracy: 0.7708
Epoch 11/100
216/216 [==============================] - 11s 51ms/step - loss: 0.8065 - accuracy: 0.7193 - val_loss: 0.5260 - val_accuracy: 0.8151
Epoch 12/100
216/216 [==============================] - 11s 51ms/step - loss: 0.6862 - accuracy: 0.7615 - val_loss: 0.4934 - val_accuracy: 0.8285
Epoch 13/100
216/216 [==============================] - 11s 51ms/step - loss: 0.6367 - accuracy: 0.7742 - val_loss: 0.5219 - val_accuracy: 0.8114
Epoch 14/100
216/216 [==============================] - 11s 51ms/step - loss: 0.6718 - accuracy: 0.7722 - val_loss: 0.4690 - val_accuracy: 0.8404
Epoch 15/100
216/216 [==============================] - 11s 51ms/step - loss: 0.5982 - accuracy: 0.7959 - val_loss: 0.4412 - val_accuracy: 0.8486
Epoch 16/100
216/216 [==============================] - 11s 53ms/step - loss: 0.5598 - accuracy: 0.8041 - val_loss: 0.4430 - val_accuracy: 0.8441
Epoch 17/100
216/216 [==============================] - 11s 52ms/step - loss: 0.5377 - accuracy: 0.8178 - val_loss: 0.4195 - val_accuracy: 0.8553
Epoch 18/100
216/216 [==============================] - 11s 53ms/step - loss: 0.5135 - accuracy: 0.8220 - val_loss: 0.3864 - val_accuracy: 0.8638
Epoch 19/100
216/216 [==============================] - 11s 52ms/step - loss: 0.5474 - accuracy: 0.8114 - val_loss: 0.3757 - val_accuracy: 0.8743
Epoch 20/100
216/216 [==============================] - 11s 51ms/step - loss: 0.4976 - accuracy: 0.8293 - val_loss: 0.3853 - val_accuracy: 0.8676
Epoch 21/100
216/216 [==============================] - 11s 52ms/step - loss: 0.4586 - accuracy: 0.8442 - val_loss: 0.3466 - val_accuracy: 0.8806
Epoch 22/100
216/216 [==============================] - 11s 52ms/step - loss: 0.4392 - accuracy: 0.8511 - val_loss: 0.3551 - val_accuracy: 0.8769
Epoch 23/100
216/216 [==============================] - 11s 51ms/step - loss: 0.4299 - accuracy: 0.8520 - val_loss: 0.3309 - val_accuracy: 0.8921
Epoch 24/100
216/216 [==============================] - 11s 52ms/step - loss: 0.4127 - accuracy: 0.8604 - val_loss: 0.3203 - val_accuracy: 0.8880
Epoch 25/100
216/216 [==============================] - 11s 52ms/step - loss: 0.3896 - accuracy: 0.8650 - val_loss: 0.3709 - val_accuracy: 0.8668
Epoch 26/100
216/216 [==============================] - 11s 52ms/step - loss: 0.4149 - accuracy: 0.8579 - val_loss: 0.3213 - val_accuracy: 0.8880
Epoch 27/100
216/216 [==============================] - 11s 51ms/step - loss: 0.3678 - accuracy: 0.8722 - val_loss: 0.2961 - val_accuracy: 0.9022
Epoch 28/100
216/216 [==============================] - 11s 52ms/step - loss: 0.3654 - accuracy: 0.8756 - val_loss: 0.3126 - val_accuracy: 0.8962
Epoch 29/100
216/216 [==============================] - 11s 52ms/step - loss: 0.3569 - accuracy: 0.8793 - val_loss: 0.3210 - val_accuracy: 0.8873
Epoch 30/100
216/216 [==============================] - 11s 51ms/step - loss: 0.3476 - accuracy: 0.8808 - val_loss: 0.2902 - val_accuracy: 0.9062
Epoch 31/100
216/216 [==============================] - 11s 51ms/step - loss: 0.3213 - accuracy: 0.8893 - val_loss: 0.2756 - val_accuracy: 0.9100
Epoch 32/100
216/216 [==============================] - 11s 52ms/step - loss: 0.3236 - accuracy: 0.8900 - val_loss: 0.2997 - val_accuracy: 0.9003
Epoch 33/100
216/216 [==============================] - 11s 51ms/step - loss: 0.3087 - accuracy: 0.8951 - val_loss: 0.2744 - val_accuracy: 0.9092
Epoch 34/100
216/216 [==============================] - 11s 51ms/step - loss: 0.2977 - accuracy: 0.8996 - val_loss: 0.2792 - val_accuracy: 0.9085
Epoch 35/100
216/216 [==============================] - 11s 49ms/step - loss: 0.2962 - accuracy: 0.9000 - val_loss: 0.2843 - val_accuracy: 0.9085
Epoch 36/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2864 - accuracy: 0.9048 - val_loss: 0.2663 - val_accuracy: 0.9111
Epoch 37/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2844 - accuracy: 0.9035 - val_loss: 0.2926 - val_accuracy: 0.9096
Epoch 38/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2797 - accuracy: 0.9061 - val_loss: 0.2561 - val_accuracy: 0.9215
Epoch 39/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2687 - accuracy: 0.9100 - val_loss: 0.2646 - val_accuracy: 0.9159
Epoch 40/100
216/216 [==============================] - 10s 49ms/step - loss: 0.2780 - accuracy: 0.9061 - val_loss: 0.2812 - val_accuracy: 0.9144
Epoch 41/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2594 - accuracy: 0.9116 - val_loss: 0.2968 - val_accuracy: 0.9044
Epoch 42/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2643 - accuracy: 0.9096 - val_loss: 0.2725 - val_accuracy: 0.9185
Epoch 43/100
216/216 [==============================] - 11s 49ms/step - loss: 0.2527 - accuracy: 0.9146 - val_loss: 0.2525 - val_accuracy: 0.9245
Epoch 44/100
216/216 [==============================] - 10s 49ms/step - loss: 0.2521 - accuracy: 0.9155 - val_loss: 0.2448 - val_accuracy: 0.9263
Epoch 45/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2391 - accuracy: 0.9165 - val_loss: 0.2475 - val_accuracy: 0.9263
Epoch 46/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2424 - accuracy: 0.9209 - val_loss: 0.2489 - val_accuracy: 0.9219
Epoch 47/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2267 - accuracy: 0.9240 - val_loss: 0.2714 - val_accuracy: 0.9167
Epoch 48/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2330 - accuracy: 0.9209 - val_loss: 0.2429 - val_accuracy: 0.9301
Epoch 49/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2170 - accuracy: 0.9289 - val_loss: 0.2622 - val_accuracy: 0.9245
Epoch 50/100
216/216 [==============================] - 10s 49ms/step - loss: 0.2242 - accuracy: 0.9242 - val_loss: 0.2570 - val_accuracy: 0.9278
Epoch 51/100
216/216 [==============================] - 10s 49ms/step - loss: 0.2217 - accuracy: 0.9273 - val_loss: 0.2425 - val_accuracy: 0.9315
Epoch 52/100
216/216 [==============================] - 11s 49ms/step - loss: 0.1976 - accuracy: 0.9309 - val_loss: 0.2328 - val_accuracy: 0.9342
Epoch 53/100
216/216 [==============================] - 10s 48ms/step - loss: 0.2013 - accuracy: 0.9292 - val_loss: 0.2901 - val_accuracy: 0.9111
Epoch 54/100
216/216 [==============================] - 10s 48ms/step - loss: 0.3031 - accuracy: 0.9008 - val_loss: 0.2425 - val_accuracy: 0.9289
Epoch 55/100
216/216 [==============================] - 10s 49ms/step - loss: 0.2134 - accuracy: 0.9302 - val_loss: 0.2367 - val_accuracy: 0.9345
Epoch 56/100
216/216 [==============================] - 10s 48ms/step - loss: 0.1893 - accuracy: 0.9386 - val_loss: 0.2310 - val_accuracy: 0.9319
Epoch 57/100
216/216 [==============================] - 10s 48ms/step - loss: 0.1933 - accuracy: 0.9353 - val_loss: 0.2231 - val_accuracy: 0.9349
Epoch 58/100
216/216 [==============================] - 10s 48ms/step - loss: 0.1813 - accuracy: 0.9357 - val_loss: 0.2295 - val_accuracy: 0.9338
Epoch 59/100
216/216 [==============================] - 10s 48ms/step - loss: 0.1887 - accuracy: 0.9379 - val_loss: 0.2235 - val_accuracy: 0.9342
Epoch 60/100
216/216 [==============================] - 10s 48ms/step - loss: 0.1826 - accuracy: 0.9397 - val_loss: 0.2414 - val_accuracy: 0.9338
Epoch 61/100
216/216 [==============================] - 10s 48ms/step - loss: 0.1841 - accuracy: 0.9382 - val_loss: 0.2235 - val_accuracy: 0.9353
Epoch 62/100
216/216 [==============================] - 10s 48ms/step - loss: 0.1671 - accuracy: 0.9413 - val_loss: 0.2344 - val_accuracy: 0.9330
In [19]:
# Function to plot "accuracy vs epoch" graphs and "loss vs epoch" graphs for training and validation data
def plot_metrics(model_name, metric = 'accuracy'):
    if metric == 'loss':
        plt.title("Loss Values")
        plt.plot(model_name.history['loss'], label = 'train')
        plt.plot(model_name.history['val_loss'], label = 'test')
        plt.legend()
        plt.show()
    else:
        plt.title("Accuracy Values")
        plt.plot(model_name.history['accuracy'], label='train') 
        plt.plot(model_name.history['val_accuracy'], label='test') 
        plt.legend()
        plt.show()
In [20]:
plot_metrics(history, 'accuracy')
plot_metrics(history, 'loss')
In [21]:
# Saving our trained model
from tensorflow.keras.models import save_model
if os.path.isfile('best_model.h5') is False:
    model.save('best_model.h5')

Evaluating Model Performance

In [22]:
#Plotting a confusion matrix for checking the performance of our model
Y_pred = np.argmax(model.predict(X_test), axis = 1)
cnf = confusion_matrix(y_test.argmax(axis = 1), Y_pred)


df_cnf = pd.DataFrame(cnf, range(28), range(28))
sns.set(font_scale = 2)
plt.figure(figsize = (25, 20))
sns.heatmap(df_cnf, annot = True, linewidths = 0.8, fmt = '0.3g', cbar = False)
plt.title("Confusion Matrix")
plt.xlabel("True Values")
plt.ylabel("Prediction Values")
plt.show()

Making Predictions on One Label

In [23]:
pred = np.argmax(model.predict(np.expand_dims(X_test[7], axis = 0)))
preds = "Prediction: " + str(pred)
plt.figure(figsize = (7,7))
actual_label = np.argmax(y_test[7]) + 1
plt.imshow(X_test[7])

plt.grid(False)
plt.axis("off")
plt.title(preds)
plt.suptitle("Actual Label " + str(actual_label))
Out[23]:
Text(0.5, 0.98, 'Actual Label 3')
In [24]:
# Saving our trained model
from tensorflow.keras.models import save_model
if os.path.isfile('best_model.h5') is False:
    model.save('best_model.h5')
In [25]:
from tensorflow.keras import models
model = models.load_model('best_model.h5')
In [26]:
!deepCC best_model.h5
[INFO]
Reading [keras model] 'best_model.h5'
[SUCCESS]
Saved 'best_model_deepC/best_model.onnx'
[INFO]
Reading [onnx model] 'best_model_deepC/best_model.onnx'
[INFO]
Model info:
  ir_vesion : 4
  doc       : 
[WARNING]
[ONNX]: terminal (input/output) conv2d_input's shape is less than 1. Changing it to 1.
[WARNING]
[ONNX]: terminal (input/output) dense_2's shape is less than 1. Changing it to 1.
[INFO]
Running DNNC graph sanity check ...
[SUCCESS]
Passed sanity check.
[INFO]
Writing C++ file 'best_model_deepC/best_model.cpp'
[INFO]
deepSea model files are ready in 'best_model_deepC/' 
[RUNNING COMMAND]
g++ -std=c++11 -O3 -fno-rtti -fno-exceptions -I. -I/opt/tljh/user/lib/python3.7/site-packages/deepC-0.13-py3.7-linux-x86_64.egg/deepC/include -isystem /opt/tljh/user/lib/python3.7/site-packages/deepC-0.13-py3.7-linux-x86_64.egg/deepC/packages/eigen-eigen-323c052e1731 "best_model_deepC/best_model.cpp" -D_AITS_MAIN -o "best_model_deepC/best_model.exe"
[RUNNING COMMAND]
size "best_model_deepC/best_model.exe"
   text	   data	    bss	    dec	    hex	filename
 283123	   3960	    760	 287843	  46463	best_model_deepC/best_model.exe
[SUCCESS]
Saved model as executable "best_model_deepC/best_model.exe"