
Identifying Arabic Digits

How do you describe Arabic language? Arabic is a Central Semitic language, closely related to Aramaic and Hebrew. Standard or Classical Arabic – Fusha – is the distinct form of the language used in media, newspapers, literature and other formal settings and its one of the most beautiful languages.

In [1]:
# Import all the necessary libraries

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix

Unzip the Dataset

In [2]:
!wget ''

!unzip -qo 
--2021-12-08 05:41:47--
Resolving (
Connecting to (||:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 25159080 (24M) [application/x-zip-compressed]
Saving to: ‘’          100%[===================>]  23.99M   107MB/s    in 0.2s    

2021-12-08 05:41:47 (107 MB/s) - ‘’ saved [25159080/25159080]

Load Training and Testing Labels

In [3]:
train=pd.read_csv('csvTrainImages 13440x1024.csv')
test=pd.read_csv('csvTestImages 3360x1024.csv')
train_label=pd.read_csv('csvTrainLabel 13440x1.csv')
test_label=pd.read_csv('csvTestLabel 3360x1.csv')
In [4]:
0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 ... 0.896 0.897 0.898 0.899 0.900 0.901 0.902 0.903 0.904 0.905
0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 1024 columns

Display the Shapes of Training and Testing Labels

In [5]:
print("Training Images Shape = ", train.shape)
print("Testing Images Shape = ",test.shape)
print("Training Labels Shape = ",train_label.shape)
print("Testing Labels Shape = ",test_label.shape)
Training Images Shape =  (13439, 1024)
Testing Images Shape =  (3359, 1024)
Training Labels Shape =  (13439, 1)
Testing Labels Shape =  (3359, 1)
In [6]:
labels = train_label.iloc[:,0].unique()

# Digits vary from 1 - 28
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28]
In [7]:

X = train
y0 = train_label

Encoding the labels using Label Binarizer

In [8]:
binencoder = LabelBinarizer()
y = binencoder.fit_transform(y0)
In [9]:
X_images = X.reshape(-1,32,32)
test_images = test.reshape(-1,32,32)

(13439, 32, 32)
(3359, 32, 32)
In [10]:
# Split the dataset into training and testing labels
X_train, X_test, y_train, y_test = train_test_split(X_images, y, test_size = 0.2, random_state=90)

Function to Visualize Some Images

In [11]:
def visualize_images(df, img_size, number_of_images, name):
    n_rows = df.shape[0]
    f = plt.figure(figsize=(15,15)) # defining a figure
    reshaped_df = df.reshape(df.shape[0], img_size, img_size)
    number_of_rows = number_of_images/5 if number_of_images%5 == 0 else (number_of_images/5) +1
    for i in range(number_of_images):        
        f.add_subplot(number_of_rows, 5, i+1, xticks=[], yticks=[])
        #plt.figure(figsize = (7,7))
        plt.title(np.argmax(name[i]), color = 'blue', fontdict = {'size' : '25'})
        plt.imshow(reshaped_df[i], cmap='gray')
In [12]:
visualize_images(X_train, 32, 20, y_train)
<Figure size 576x576 with 0 Axes>

Visualize pixels of one Image

In [13]:
def visualize_input(img, ax):
    img = img.reshape(32, 32)
    ax.imshow(img, cmap='gray')
    width, height = img.shape
    thresh = img.max()/2.5
    for x in range(width):
        for y in range(height):
            ax.annotate(str(round(img[x][y],2)), xy=(y,x),
                        color='white' if img[x][y]<thresh else 'black')

fig = plt.figure(figsize = (15,15)) 
ax = fig.add_subplot(111, xticks=[], yticks=[])

visualize_input(X_train[0], ax)
In [14]:
# Scaling and shaping the images
X_train = X_train/255
X_test = X_test/255

X_train = X_train.reshape(-1,32,32,1).astype('float32')
X_test = X_test.reshape(-1,32,32,1).astype('float32')

Model Architecture

In [15]:
# Defiining Early Stopping function to monitor Validation Loss
es = EarlyStopping(monitor='val_loss', patience=5)
In [16]:
# Defining Model Architecture
model = Sequential()





model.add(Dense(36, activation='relu'))
model.add(Dense(28, activation='sigmoid'))
Model: "sequential"
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 30, 30, 32)        320       
max_pooling2d (MaxPooling2D) (None, 15, 15, 32)        0         
conv2d_1 (Conv2D)            (None, 13, 13, 32)        9248      
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 32)          0         
conv2d_2 (Conv2D)            (None, 4, 4, 32)          9248      
max_pooling2d_2 (MaxPooling2 (None, 2, 2, 32)          0         
dropout (Dropout)            (None, 2, 2, 32)          0         
flatten (Flatten)            (None, 128)               0         
dense (Dense)                (None, 36)                4644      
dense_1 (Dense)              (None, 36)                1332      
dropout_1 (Dropout)          (None, 36)                0         
dense_2 (Dense)              (None, 28)                1036      
Total params: 25,828
Trainable params: 25,828
Non-trainable params: 0
In [17]:
# Compile the model

Model Training

In [18]:
# Run the model for a batch size of 50 for 100 epochs
history =, 
                    validation_data = (X_test, y_test),
                    batch_size = 50,
                    epochs = 100,
                    callbacks = [es]
In [19]:
# Function to plot "accuracy vs epoch" graphs and "loss vs epoch" graphs for training and validation data
def plot_metrics(model_name, metric = 'accuracy'):
    if metric == 'loss':
        plt.title("Loss Values")
        plt.plot(model_name.history['loss'], label = 'train')
        plt.plot(model_name.history['val_loss'], label = 'test')
        plt.title("Accuracy Values")
        plt.plot(model_name.history['accuracy'], label='train') 
        plt.plot(model_name.history['val_accuracy'], label='test') 
In [20]:
plot_metrics(history, 'accuracy')
plot_metrics(history, 'loss')
In [21]:
# Saving our trained model
from tensorflow.keras.models import save_model
if os.path.isfile('best_model.h5') is False:'best_model.h5')

Evaluating Model Performance

In [22]:
#Plotting a confusion matrix for checking the performance of our model
Y_pred = np.argmax(model.predict(X_test), axis = 1)
cnf = confusion_matrix(y_test.argmax(axis = 1), Y_pred)

df_cnf = pd.DataFrame(cnf, range(28), range(28))
sns.set(font_scale = 2)
plt.figure(figsize = (25, 20))
sns.heatmap(df_cnf, annot = True, linewidths = 0.8, fmt = '0.3g', cbar = False)
plt.title("Confusion Matrix")
plt.xlabel("True Values")
plt.ylabel("Prediction Values")

Making Predictions on One Label

In [23]:
pred = np.argmax(model.predict(np.expand_dims(X_test[7], axis = 0)))
preds = "Prediction: " + str(pred)
plt.figure(figsize = (7,7))
actual_label = np.argmax(y_test[7]) + 1

plt.suptitle("Actual Label " + str(actual_label))
Text(0.5, 0.98, 'Actual Label 3')
In [24]:
# Saving our trained model
from tensorflow.keras.models import save_model
if os.path.isfile('best_model.h5') is False:'best_model.h5')
In [25]:
from tensorflow.keras import models
model = models.load_model('best_model.h5')
In [26]:
!deepCC best_model.h5
Reading [keras model] 'best_model.h5'
Saved 'best_model_deepC/best_model.onnx'
Reading [onnx model] 'best_model_deepC/best_model.onnx'
Model info:
  ir_vesion : 4
  doc       : 
[ONNX]: terminal (input/output) conv2d_input's shape is less than 1. Changing it to 1.
[ONNX]: terminal (input/output) dense_2's shape is less than 1. Changing it to 1.
Running DNNC graph sanity check ...
Passed sanity check.
Writing C++ file 'best_model_deepC/best_model.cpp'
deepSea model files are ready in 'best_model_deepC/' 
g++ -std=c++11 -O3 -fno-rtti -fno-exceptions -I. -I/opt/tljh/user/lib/python3.7/site-packages/deepC-0.13-py3.7-linux-x86_64.egg/deepC/include -isystem /opt/tljh/user/lib/python3.7/site-packages/deepC-0.13-py3.7-linux-x86_64.egg/deepC/packages/eigen-eigen-323c052e1731 "best_model_deepC/best_model.cpp" -D_AITS_MAIN -o "best_model_deepC/best_model.exe"
size "best_model_deepC/best_model.exe"
   text	   data	    bss	    dec	    hex	filename
 283123	   3960	    760	 287843	  46463	best_model_deepC/best_model.exe
Saved model as executable "best_model_deepC/best_model.exe"