Cainvas

Import Library and Dataset

In [77]:
!pip install wget
import numpy as np 
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.model_selection import StratifiedShuffleSplit,train_test_split
import glob
import tensorflow as tf
from sklearn import metrics
from sklearn import preprocessing
from tensorflow import keras
sns.set_style('darkgrid')
!wget -N 'https://cainvas-static.s3.amazonaws.com/media/user_data/DevanshChowd/signature.zip'
!unzip signature.zip
Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: wget in ./.local/lib/python3.7/site-packages (3.2)
WARNING: You are using pip version 20.3.1; however, version 21.1.3 is available.
You should consider upgrading via the '/opt/tljh/user/bin/python -m pip install --upgrade pip' command.

Each Image in Folder with suffix _forg are forged

Each Image in Folders without this suffix are Authentic or signed by the same person .

In [11]:
display(sorted(glob.glob('train_data/*'))[:4])
print('Each Folder has' ,len(glob.glob('train_data/001/*')),'Authentic Images and Folders with suffix forf have',len(glob.glob('train_data/001_forg/*')),'Images.')
print('Training Size is ',len(glob.glob('train_data/*/*')),'Validation Size is ',len(glob.glob('validation_data/*/*')),'Test Size is ',len(glob.glob('test_data/*/*')))
['train_data/001',
 'train_data/001_forg',
 'train_data/002',
 'train_data/002_forg']
Each Folder has 12 Authentic Images and Folders with suffix forf have 4 Images.
Training Size is  826 Validation Size is  413 Test Size is  410
In [10]:
plt.imshow(plt.imread('train_data/001/001_01.PNG'))
Out[10]:
<matplotlib.image.AxesImage at 0x7f5743a23a58>
In [22]:
def get_data(dir, labels):
    features = []
    targets = []
    for name in tqdm.tqdm(sorted(os.listdir(dir))):
        for image_name in sorted(os.listdir(os.path.join(dir,name))):
            img = cv2.imread(os.path.join(dir,name,image_name), cv2.IMREAD_GRAYSCALE)
            features.append(img)
            if labels:
                if 'forg' in name.lower():
                    targets.append(1)
                else:
                    targets.append(0)
    if labels:
        return np.array(features),np.array(targets)
    else:
        return np.array(features)
train_features, train_labels = get_data('train_data', True)
validation_features, validation_labels = get_data('validation_data', True)
test_features, test_labels = get_data('test_data', True)
print(train_features.shape,validation_features.shape,test_features.shape)
train_features = train_features.reshape((826, 268, 650, 1))
train_features = train_features.astype('float32') / 255
validation_features = validation_features.reshape((413, 268, 650, 1))
validation_features = validation_features.astype('float32') / 255
test_features = test_features.reshape((410, 268, 650, 1))
test_features = test_features.astype('float32') / 255
100%|██████████| 128/128 [00:01<00:00, 109.81it/s]
100%|██████████| 128/128 [00:00<00:00, 201.96it/s]
100%|██████████| 128/128 [00:00<00:00, 195.93it/s]
(826, 268, 650) (413, 268, 650) (410, 268, 650)
In [32]:
def get_model():
    keras.backend.clear_session()
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(268, 650, 1),padding='same'))
    model.add(keras.layers.MaxPooling2D((2, 2),padding='same'))
    model.add(keras.layers.Conv2D(32, (3, 3), activation='relu',padding='same'))
    model.add(keras.layers.MaxPooling2D((2, 2),padding='same'))
    model.add(keras.layers.Conv2D(64, (3, 3), activation='relu',padding='same'))
    model.add(keras.layers.MaxPooling2D((2, 2),padding='same'))
    model.add(keras.layers.Conv2D(64, (3, 3), activation='relu',padding='same'))
    model.add(keras.layers.MaxPooling2D((2, 2),padding='same'))
    model.add(keras.layers.Conv2D(64, (3, 3), activation='relu',padding='same'))
    model.add(keras.layers.MaxPooling2D((2, 2),padding='same'))
    model.add(keras.layers.Conv2D(64, (3, 3), activation='relu',padding='same'))
    model.add(keras.layers.MaxPooling2D((2, 2),padding='same'))
    model.add(keras.layers.Conv2D(64, (3, 3), activation='relu',padding='same'))
    model.add(keras.layers.MaxPooling2D((2, 2),padding='same'))
    model.add(keras.layers.Conv2D(64, (3, 3), activation='relu',padding='same'))
    model.add(keras.layers.MaxPooling2D((2, 2),padding='same'))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(32, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['accuracy'])
    model.summary()
    return model
model = get_model()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 268, 650, 32)      320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 134, 325, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 134, 325, 32)      9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 67, 163, 32)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 67, 163, 64)       18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 34, 82, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 34, 82, 64)        36928     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 17, 41, 64)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 17, 41, 64)        36928     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 9, 21, 64)         0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 9, 21, 64)         36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 5, 11, 64)         0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 5, 11, 64)         36928     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 3, 6, 64)          0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 3, 6, 64)          36928     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 2, 3, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 384)               0         
_________________________________________________________________
dense (Dense)                (None, 32)                12320     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
=================================================================
Total params: 225,057
Trainable params: 225,057
Non-trainable params: 0
_________________________________________________________________
In [34]:
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['accuracy'])
early_stop = keras.callbacks.EarlyStopping(monitor= 'val_accuracy',patience=10,restore_best_weights=True)
red_lr = keras.callbacks.ReduceLROnPlateau(patience=3,monitor= 'val_accuracy',factor=0.75)
model_check = keras.callbacks.ModelCheckpoint(f"signature.h5",save_best_only=True)
history = model.fit(train_features, train_labels ,validation_data=(validation_features, validation_labels),epochs=30,callbacks = [early_stop,model_check,red_lr])
Epoch 1/30
 2/26 [=>............................] - ETA: 1s - loss: 0.6967 - accuracy: 0.4375WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0412s vs `on_train_batch_end` time: 0.0719s). Check your callbacks.
26/26 [==============================] - 4s 173ms/step - loss: 0.6935 - accuracy: 0.5194 - val_loss: 0.6921 - val_accuracy: 0.5375
Epoch 2/30
26/26 [==============================] - 3s 130ms/step - loss: 1.0469 - accuracy: 0.6840 - val_loss: 0.2980 - val_accuracy: 0.9201
Epoch 3/30
26/26 [==============================] - 3s 129ms/step - loss: 0.1682 - accuracy: 0.9588 - val_loss: 1.4126 - val_accuracy: 0.5642
Epoch 4/30
26/26 [==============================] - 3s 128ms/step - loss: 0.2496 - accuracy: 0.9092 - val_loss: 0.5117 - val_accuracy: 0.9007
Epoch 5/30
26/26 [==============================] - 3s 129ms/step - loss: 0.0709 - accuracy: 0.9818 - val_loss: 0.4107 - val_accuracy: 0.9056
Epoch 6/30
26/26 [==============================] - 3s 129ms/step - loss: 0.1562 - accuracy: 0.9588 - val_loss: 0.4255 - val_accuracy: 0.9104
Epoch 7/30
26/26 [==============================] - 3s 129ms/step - loss: 0.0525 - accuracy: 0.9843 - val_loss: 1.1616 - val_accuracy: 0.8814
Epoch 8/30
26/26 [==============================] - 3s 130ms/step - loss: 0.0541 - accuracy: 0.9818 - val_loss: 0.4643 - val_accuracy: 0.9128
Epoch 9/30
26/26 [==============================] - 3s 129ms/step - loss: 0.0315 - accuracy: 0.9891 - val_loss: 0.3634 - val_accuracy: 0.8983
Epoch 10/30
26/26 [==============================] - 3s 130ms/step - loss: 0.0168 - accuracy: 0.9939 - val_loss: 0.7934 - val_accuracy: 0.9056
Epoch 11/30
26/26 [==============================] - 3s 130ms/step - loss: 0.0292 - accuracy: 0.9891 - val_loss: 1.0185 - val_accuracy: 0.8983
Epoch 12/30
26/26 [==============================] - 3s 130ms/step - loss: 0.0153 - accuracy: 0.9952 - val_loss: 1.3521 - val_accuracy: 0.8935
In [82]:
fig = plt.figure()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
fig.savefig('Accuracy vs Epoch.png')
In [83]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
fig.savefig('Loss vs Epoch.png')
In [37]:
model.evaluate(train_features, train_labels)
model.evaluate(validation_features, validation_labels)
model.evaluate(test_features, test_labels)
26/26 [==============================] - 1s 39ms/step - loss: 0.2028 - accuracy: 0.9661
13/13 [==============================] - 0s 36ms/step - loss: 0.2980 - accuracy: 0.9201
13/13 [==============================] - 0s 36ms/step - loss: 0.3154 - accuracy: 0.8951
Out[37]:
[0.3153679072856903, 0.8951219320297241]
In [70]:
indices = np.random.randint(len(test_features),size=15)
demo,demo_pred = test_features[indices],test_labels[indices]
predictions = (model.predict(demo)>.5).astype('int32')
In [80]:
fig,ax = plt.subplots(3,5,figsize=(15,7))
plt.tick_params(left = False, bottom = False)
for i in range(3):
    for j in range(5):
        ax[i][j].imshow(demo[i*5+j])
        ax[i][j].tick_params(left = False, bottom = False)
        ax[i][j].set_xticks([])
        ax[i][j].set_yticks([])
        ax[i][j].set_title("Truth Label "+str(demo_pred[i*5+j])+"   \n PREDICTED   "+str(predictions[i*5+j][0]))
fig.savefig('Predict.png')
In [81]:
model.save('signature.h5')