SPIDER BREED CLASSIFICATION¶
Import Library¶
In [1]:
!pip install wget
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.model_selection import StratifiedShuffleSplit,train_test_split
import glob
import tensorflow as tf
from sklearn import metrics
from tensorflow import keras
!wget -N 'https://cainvas-static.s3.amazonaws.com/media/user_data/devanshchowd/spiders.tar.gz'
import tarfile
file = tarfile.open('spiders.tar.gz')
file.extractall('./')
file.close()
Basic Analysis¶
In [2]:
print("Training Sample" ,len(glob.glob('spiders/train/*/*')),"Validation Sample" ,len(glob.glob('spiders/valid/*/*')),"Testing Sample" ,len(glob.glob('spiders/test/*/*')))
data = pd.read_csv('spiders/spiders.csv')
data.filepaths = data.filepaths.apply(lambda x:x.replace("\\","/"))
data['category'] = data.filepaths.apply(lambda x:x.split("/")[0])
sns.set_style('darkgrid')
sns.countplot(data = data,y = 'labels')
path = 'spiders'
data.filepaths = data.filepaths.apply(lambda x:os.path.join(path,x))
In [3]:
def get_img(index):
img = plt.imread(data.iloc[index]['filepaths'])
return img
demo = np.append(np.array(data[data['labels']==data['labels'].unique()[0]].index[:5]),np.array(data[data['labels']==data['labels'].unique()[1]].index[:5]))
fig,ax = plt.subplots(2,5,figsize=(15,10))
plt.tick_params(left = False, bottom = False)
for i in range(2):
for j in range(5):
ax[i][j].imshow(get_img(demo[i*5+j]))
ax[i][j].tick_params(left = False, bottom = False)
ax[i][j].set_xticks([])
ax[i][j].set_yticks([])
ax[i][j].set_title(data.iloc[demo[i*5+j]].labels)
fig.savefig('Sample.png')
Model¶
In [4]:
def get_model(dim = (224,224,3)):
keras.backend.clear_session()
model = keras.models.Sequential()
model.add(keras.layers.Conv2D(32,(3,3),activation='relu',input_shape=dim))
model.add(keras.layers.Conv2D(32,(3,3),activation='relu',padding="valid",))
model.add(keras.layers.MaxPooling2D((2,2)))
model.add(keras.layers.Dropout(.4))
model.add(keras.layers.Conv2D(32,(5,5),activation='relu',padding="valid"))
model.add(keras.layers.MaxPooling2D((2,2)))
model.add(keras.layers.Dropout(.4))
model.add(keras.layers.Conv2D(64,(5,5),activation='relu',padding="valid"))
model.add(keras.layers.MaxPooling2D((2,2)))
model.add(keras.layers.Dropout(.4))
model.add(keras.layers.Conv2D(64,(5,5),activation='relu',padding="valid"))
model.add(keras.layers.MaxPooling2D((2,2)))
model.add(keras.layers.Dropout(.4))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dropout(.4))
model.add(keras.layers.Dense(2,activation='softmax'))
model.summary()
return model
train_val,test = train_test_split(data,stratify=data['labels'],test_size=40,shuffle=True)
train,val = train_test_split(train_val,stratify=train_val['labels'],test_size=.2,shuffle=True)
train.reset_index(drop=True,inplace=True)
val.reset_index(drop=True,inplace=True)
test.reset_index(drop=True,inplace=True)
In [5]:
get_model()
Out[5]:
In [6]:
datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255.,
horizontal_flip = True,
vertical_flip=True)
datagen_scale = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
train_generator= datagen.flow_from_dataframe(train,x_col ='filepaths',y_col='labels',target_size=(224,224),batch_size=64,shuffle=True)
valid_generator= datagen.flow_from_dataframe(val,x_col ='filepaths',y_col='labels',target_size=(224,224),shuffle=False)
test_generator = datagen.flow_from_dataframe(test,x_col ='filepaths',y_col='labels',target_size=(224,224),shuffle=False)
In [7]:
model = get_model()
model.compile(loss='binary_crossentropy',metrics='accuracy',optimizer=keras.optimizers.Adam(learning_rate=.001))
early_stop = keras.callbacks.EarlyStopping(monitor= 'val_accuracy',patience=5,restore_best_weights=True)
red_lr = keras.callbacks.ReduceLROnPlateau(patience=3,monitor= 'val_accuracy',factor=0.75)
model_check = keras.callbacks.ModelCheckpoint(f"cnn.h5",save_best_only=True)
history = model.fit(train_generator,validation_data=valid_generator,epochs=200,callbacks = [early_stop,model_check,red_lr])
In [8]:
fig = plt.figure()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
fig.savefig('Accuracy vs Epoch.png')
In [9]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
fig.savefig('Loss vs Epoch.png')
EVALUATE¶
In [10]:
datagen_scale = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
train_generator= datagen_scale.flow_from_dataframe(train,x_col ='filepaths',y_col='labels',target_size=(224,224),batch_size=64,shuffle=True)
valid_generator= datagen_scale.flow_from_dataframe(val,x_col ='filepaths',y_col='labels',target_size=(224,224),shuffle=False)
test_generator = datagen_scale.flow_from_dataframe(test,x_col ='filepaths',y_col='labels',target_size=(224,224),shuffle=False)
model.evaluate(train_generator)
model.evaluate(valid_generator)
model.evaluate(test_generator)
Out[10]:
PREDICT¶
In [11]:
demo = np.append(np.array(test[test['labels']==test['labels'].unique()[0]].index[:5]),np.array(test[test['labels']==test['labels'].unique()[1]].index[:5]))
demo = test.iloc[demo]
demo.reset_index(drop=False,inplace=True)
datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
test_generator = datagen.flow_from_dataframe(demo,x_col ='filepaths',y_col='labels',target_size=(224,224),shuffle=False)
demo = pd.concat((demo,pd.Series(np.argmax(model.predict(test_generator),axis=1),name='Prediction').map({a:b for b,a in test_generator.class_indices.items()})),axis=1)
In [12]:
fig,ax = plt.subplots(2,5,figsize=(20,10))
plt.tick_params(left = False, bottom = False)
for i in range(2):
for j in range(5):
ax[i][j].imshow(plt.imread(demo.iloc[i*5+j].filepaths))
ax[i][j].tick_params(left = False, bottom = False)
ax[i][j].set_xticks([])
ax[i][j].set_yticks([])
ax[i][j].set_title("Truth Label "+demo.iloc[i*5+j].labels+" \n PREDICTED "+demo.iloc[i*5+j].Prediction)
fig.savefig('Predict.png')
In [13]:
model.save("Spider_Breed_Classification.h5")