Cainvas
Model Files
Spider_Breed_Classification.h5
keras
Model
deepSea Compiled Models
Spider_Breed_Classification.e…
deepSea
Ubuntu

SPIDER BREED CLASSIFICATION

Credit: AITS Cainvas Community

Photo by MinooIravani on Dribbble

Import Library

In [1]:
import numpy as np 
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.model_selection import StratifiedShuffleSplit,train_test_split
import glob
import tensorflow as tf
from sklearn import metrics
from tensorflow import keras
!wget -N 'https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/spiders.tar.gz'
import tarfile
file = tarfile.open('spiders.tar.gz')
file.extractall('./')
file.close()
--2021-07-05 06:50:53--  https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/spiders.tar.gz
Resolving cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)... 52.219.64.36
Connecting to cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)|52.219.64.36|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10012354 (9.5M) [application/x-tar]
Saving to: ‘spiders.tar.gz’

spiders.tar.gz      100%[===================>]   9.55M  --.-KB/s    in 0.07s   

2021-07-05 06:50:53 (135 MB/s) - ‘spiders.tar.gz’ saved [10012354/10012354]

Basic Analysis

In [2]:
print("Training Sample" ,len(glob.glob('spiders/train/*/*')),"Validation Sample" ,len(glob.glob('spiders/valid/*/*')),"Testing Sample" ,len(glob.glob('spiders/test/*/*')))
data = pd.read_csv('spiders/spiders.csv')
data.filepaths = data.filepaths.apply(lambda x:x.replace("\\","/"))
data['category'] = data.filepaths.apply(lambda x:x.split("/")[0])
sns.set_style('darkgrid')
sns.countplot(data = data,y = 'labels')
path = 'spiders'
data.filepaths = data.filepaths.apply(lambda x:os.path.join(path,x))
Training Sample 366 Validation Sample 10 Testing Sample 10
In [3]:
def get_img(index):
    img = plt.imread(data.iloc[index]['filepaths'])
    return img
demo = np.append(np.array(data[data['labels']==data['labels'].unique()[0]].index[:5]),np.array(data[data['labels']==data['labels'].unique()[1]].index[:5]))
fig,ax = plt.subplots(2,5,figsize=(15,10))
plt.tick_params(left = False, bottom = False)
for i in range(2):
    for j in range(5):
        ax[i][j].imshow(get_img(demo[i*5+j]))
        ax[i][j].tick_params(left = False, bottom = False)
        ax[i][j].set_xticks([])
        ax[i][j].set_yticks([])
        ax[i][j].set_title(data.iloc[demo[i*5+j]].labels)
fig.savefig('Sample.png')

Model

In [4]:
def get_model(dim = (224,224,3)):
    keras.backend.clear_session()
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(32,(3,3),activation='relu',input_shape=dim))
    model.add(keras.layers.Conv2D(32,(3,3),activation='relu',padding="valid",))
    model.add(keras.layers.MaxPooling2D((2,2)))
    model.add(keras.layers.Dropout(.4))
    model.add(keras.layers.Conv2D(32,(5,5),activation='relu',padding="valid"))
    model.add(keras.layers.MaxPooling2D((2,2)))
    model.add(keras.layers.Dropout(.4))
    model.add(keras.layers.Conv2D(64,(5,5),activation='relu',padding="valid"))
    model.add(keras.layers.MaxPooling2D((2,2)))
    model.add(keras.layers.Dropout(.4))
    model.add(keras.layers.Conv2D(64,(5,5),activation='relu',padding="valid"))
    model.add(keras.layers.MaxPooling2D((2,2)))
    model.add(keras.layers.Dropout(.4))
    model.add(keras.layers.Flatten()) 
    model.add(keras.layers.Dropout(.4))
    model.add(keras.layers.Dense(2,activation='softmax'))
    model.summary()
    return model
train_val,test = train_test_split(data,stratify=data['labels'],test_size=40,shuffle=True)
train,val  = train_test_split(train_val,stratify=train_val['labels'],test_size=.2,shuffle=True)
train.reset_index(drop=True,inplace=True)
val.reset_index(drop=True,inplace=True)
test.reset_index(drop=True,inplace=True)
In [5]:
get_model()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 222, 222, 32)      896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 220, 220, 32)      9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 110, 110, 32)      0         
_________________________________________________________________
dropout (Dropout)            (None, 110, 110, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 106, 106, 32)      25632     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 53, 53, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 53, 53, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 49, 49, 64)        51264     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 24, 24, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 24, 24, 64)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 20, 20, 64)        102464    
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 10, 10, 64)        0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 10, 10, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 6400)              0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 6400)              0         
_________________________________________________________________
dense (Dense)                (None, 2)                 12802     
=================================================================
Total params: 202,306
Trainable params: 202,306
Non-trainable params: 0
_________________________________________________________________
Out[5]:
<tensorflow.python.keras.engine.sequential.Sequential at 0x7f0a411f3a90>
In [6]:
datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255.,
                                  horizontal_flip = True,
                                  vertical_flip=True)
datagen_scale = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
train_generator= datagen.flow_from_dataframe(train,x_col ='filepaths',y_col='labels',target_size=(224,224),batch_size=64,shuffle=True)
valid_generator= datagen.flow_from_dataframe(val,x_col ='filepaths',y_col='labels',target_size=(224,224),shuffle=False)
test_generator = datagen.flow_from_dataframe(test,x_col ='filepaths',y_col='labels',target_size=(224,224),shuffle=False)
Found 276 validated image filenames belonging to 2 classes.
Found 70 validated image filenames belonging to 2 classes.
Found 40 validated image filenames belonging to 2 classes.
In [7]:
model = get_model()
model.compile(loss='binary_crossentropy',metrics='accuracy',optimizer=keras.optimizers.Adam(learning_rate=.001))
early_stop = keras.callbacks.EarlyStopping(monitor= 'val_accuracy',patience=5,restore_best_weights=True)
red_lr = keras.callbacks.ReduceLROnPlateau(patience=3,monitor= 'val_accuracy',factor=0.75)
model_check = keras.callbacks.ModelCheckpoint(f"cnn.h5",save_best_only=True)
history = model.fit(train_generator,validation_data=valid_generator,epochs=200,callbacks = [early_stop,model_check,red_lr])
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 222, 222, 32)      896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 220, 220, 32)      9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 110, 110, 32)      0         
_________________________________________________________________
dropout (Dropout)            (None, 110, 110, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 106, 106, 32)      25632     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 53, 53, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 53, 53, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 49, 49, 64)        51264     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 24, 24, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 24, 24, 64)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 20, 20, 64)        102464    
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 10, 10, 64)        0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 10, 10, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 6400)              0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 6400)              0         
_________________________________________________________________
dense (Dense)                (None, 2)                 12802     
=================================================================
Total params: 202,306
Trainable params: 202,306
Non-trainable params: 0
_________________________________________________________________
Epoch 1/200
5/5 [==============================] - 2s 358ms/step - loss: 0.7151 - accuracy: 0.5471 - val_loss: 0.6843 - val_accuracy: 0.6143
Epoch 2/200
5/5 [==============================] - 1s 173ms/step - loss: 0.6376 - accuracy: 0.6703 - val_loss: 0.6635 - val_accuracy: 0.8571
Epoch 3/200
5/5 [==============================] - 1s 187ms/step - loss: 0.6591 - accuracy: 0.6232 - val_loss: 0.6394 - val_accuracy: 0.7143
Epoch 4/200
5/5 [==============================] - 1s 177ms/step - loss: 0.5373 - accuracy: 0.7609 - val_loss: 0.5502 - val_accuracy: 0.7714
Epoch 5/200
5/5 [==============================] - 1s 141ms/step - loss: 0.4828 - accuracy: 0.7826 - val_loss: 0.5128 - val_accuracy: 0.8857
Epoch 6/200
5/5 [==============================] - 1s 161ms/step - loss: 0.4214 - accuracy: 0.8225 - val_loss: 0.4846 - val_accuracy: 0.9143
Epoch 7/200
5/5 [==============================] - 1s 152ms/step - loss: 0.3733 - accuracy: 0.8623 - val_loss: 0.5669 - val_accuracy: 0.8429
Epoch 8/200
5/5 [==============================] - 1s 164ms/step - loss: 0.3171 - accuracy: 0.9022 - val_loss: 0.3331 - val_accuracy: 0.9571
Epoch 9/200
5/5 [==============================] - 1s 165ms/step - loss: 0.2678 - accuracy: 0.8986 - val_loss: 0.3141 - val_accuracy: 0.9143
Epoch 10/200
5/5 [==============================] - 1s 154ms/step - loss: 0.2056 - accuracy: 0.9312 - val_loss: 0.2157 - val_accuracy: 0.9714
Epoch 11/200
5/5 [==============================] - 1s 159ms/step - loss: 0.2782 - accuracy: 0.8659 - val_loss: 0.1888 - val_accuracy: 0.9857
Epoch 12/200
5/5 [==============================] - 1s 147ms/step - loss: 0.2370 - accuracy: 0.9058 - val_loss: 0.2055 - val_accuracy: 0.9714
Epoch 13/200
5/5 [==============================] - 1s 186ms/step - loss: 0.2481 - accuracy: 0.9203 - val_loss: 0.4527 - val_accuracy: 0.7714
Epoch 14/200
5/5 [==============================] - 1s 163ms/step - loss: 0.2856 - accuracy: 0.8659 - val_loss: 0.2870 - val_accuracy: 0.9857
Epoch 15/200
5/5 [==============================] - 1s 152ms/step - loss: 0.1870 - accuracy: 0.9384 - val_loss: 0.2804 - val_accuracy: 0.9571
Epoch 16/200
5/5 [==============================] - 1s 164ms/step - loss: 0.1783 - accuracy: 0.9348 - val_loss: 0.2444 - val_accuracy: 0.9286
In [8]:
fig = plt.figure()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
fig.savefig('Accuracy vs Epoch.png')
In [9]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
fig.savefig('Loss vs Epoch.png')

EVALUATE

In [10]:
datagen_scale = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
train_generator= datagen_scale.flow_from_dataframe(train,x_col ='filepaths',y_col='labels',target_size=(224,224),batch_size=64,shuffle=True)
valid_generator= datagen_scale.flow_from_dataframe(val,x_col ='filepaths',y_col='labels',target_size=(224,224),shuffle=False)
test_generator = datagen_scale.flow_from_dataframe(test,x_col ='filepaths',y_col='labels',target_size=(224,224),shuffle=False)
model.evaluate(train_generator)
model.evaluate(valid_generator)
model.evaluate(test_generator)
Found 276 validated image filenames belonging to 2 classes.
Found 70 validated image filenames belonging to 2 classes.
Found 40 validated image filenames belonging to 2 classes.
5/5 [==============================] - 0s 88ms/step - loss: 0.2596 - accuracy: 0.9094
3/3 [==============================] - 0s 12ms/step - loss: 0.1892 - accuracy: 0.9857
2/2 [==============================] - 0s 44ms/step - loss: 0.3140 - accuracy: 0.8250
Out[10]:
[0.31404903531074524, 0.824999988079071]

PREDICT

In [11]:
demo = np.append(np.array(test[test['labels']==test['labels'].unique()[0]].index[:5]),np.array(test[test['labels']==test['labels'].unique()[1]].index[:5]))
demo = test.iloc[demo]
demo.reset_index(drop=False,inplace=True)
datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
test_generator = datagen.flow_from_dataframe(demo,x_col ='filepaths',y_col='labels',target_size=(224,224),shuffle=False)
demo = pd.concat((demo,pd.Series(np.argmax(model.predict(test_generator),axis=1),name='Prediction').map({a:b for b,a in test_generator.class_indices.items()})),axis=1)
Found 10 validated image filenames belonging to 2 classes.
In [12]:
fig,ax = plt.subplots(2,5,figsize=(20,10))
plt.tick_params(left = False, bottom = False)
for i in range(2):
    for j in range(5):
        ax[i][j].imshow(plt.imread(demo.iloc[i*5+j].filepaths))
        ax[i][j].tick_params(left = False, bottom = False)
        ax[i][j].set_xticks([])
        ax[i][j].set_yticks([])
        ax[i][j].set_title("Truth Label "+demo.iloc[i*5+j].labels+"   \n PREDICTED   "+demo.iloc[i*5+j].Prediction)
fig.savefig('Predict.png')