In [1]:
!wget -N "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/data_week4.zip"
!unzip -qo data_week4.zip
!rm data_week4.zip
In [2]:
!pip install imagecorruptions
Importing Prequisites
In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os, random, cv2, pickle, json, itertools
import imgaug.imgaug
import imgaug.augmenters as iaa
from IPython.display import SVG
from tensorflow.keras.utils import plot_model, model_to_dot
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from collections import Counter
from sklearn.utils import class_weight
from tqdm import tqdm
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (Add, Input, Conv2D, Dropout, Activation, BatchNormalization, MaxPool2D, ZeroPadding2D, AveragePooling2D, Flatten, Dense)
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, Callback, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.initializers import *
Function to plot model metrics
In [4]:
def show_final_history(history):
plt.style.use("ggplot")
fig, ax = plt.subplots(1,2,figsize=(15,5))
ax[0].set_title('Loss')
ax[1].set_title('Accuracy')
ax[0].plot(history.history['loss'],label='Train Loss')
ax[0].plot(history.history['val_loss'],label='Validation Loss')
ax[1].plot(history.history['accuracy'],label='Train Accuracy')
ax[1].plot(history.history['val_accuracy'],label='Validation Accuracy')
ax[0].legend(loc='upper right')
ax[1].legend(loc='lower right')
plt.show();
pass
Identifying the dataset and classes
In [5]:
datasets = ['data_week4/dataset']
class_names = ["no-ship","ship"]
class_name_labels = {class_name:i for i,class_name in enumerate(class_names)}
num_classes = len(class_names)
class_name_labels
Out[5]:
Loading the data
In [6]:
def load_data():
images, labels = [], []
for dataset in datasets:
for folder in os.listdir(dataset):
label = class_name_labels[folder]
for file in tqdm(os.listdir(os.path.join(dataset,folder))):
img_path = os.path.join(dataset,folder,file)
img = cv2.imread(img_path)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (48,48))
images.append(img)
labels.append(label)
pass
pass
images = np.array(images,dtype=np.float32)/255.0
labels = np.array(labels,dtype=np.float32)
pass
return (images, labels)
pass
In [7]:
(images, labels) = load_data()
images.shape, labels.shape
Out[7]:
Checking our data for class imbalance
In [8]:
n_labels = labels.shape[0]
_, count = np.unique(labels, return_counts=True)
df = pd.DataFrame(data = count)
df['Class Label'] = class_names
df.columns = ['Count','Class-Label']
df.set_index('Class-Label',inplace=True)
df
Out[8]:
In [9]:
df.plot.bar(rot=0)
plt.title("distribution of images per class");
Augmenting images to prevent underfitting and remove imbalance in the dataset
In [10]:
def augment_add(images, seq, labels):
augmented_images, augmented_labels = [],[]
for idx,img in tqdm(enumerate(images)):
if labels[idx] == 1:
image_aug_1 = seq.augment_image(image=img)
image_aug_2 = seq.augment_image(image=img)
augmented_images.append(image_aug_1)
augmented_images.append(image_aug_2)
augmented_labels.append(labels[idx])
augmented_labels.append(labels[idx])
pass
augmented_images = np.array(augmented_images, dtype=np.float32)
augmented_labels = np.array(augmented_labels, dtype=np.float32)
return (augmented_images, augmented_labels)
pass
In [11]:
seq = iaa.Sequential([
iaa.Fliplr(0.5),
iaa.Crop(percent=(0,0.1)),
iaa.LinearContrast((0.75,1.5)),
iaa.Multiply((0.8,1.2), per_channel=0.2),
iaa.Affine(
scale={'x':(0.8,1.2), "y":(0.8,1.2)},
translate_percent={"x":(-0.2,0.2),"y":(-0.2,0.2)},
rotate=(-25,25),
shear=(-8,8)
)
], random_order=True)
In [12]:
(aug_images, aug_labels) = augment_add(images, seq, labels)
images = np.concatenate([images, aug_images])
labels = np.concatenate([labels, aug_labels])
In [13]:
images.shape, labels.shape
Out[13]:
In [14]:
labels = to_categorical(labels)
Dividing images into train,validation and test sets
In [15]:
np.random.seed(42)
np.random.shuffle(images)
np.random.seed(42)
np.random.shuffle(labels)
In [16]:
total_count = len(images)
total_count
train = int(0.7*total_count)
val = int(0.2*total_count)
test = int(0.1*total_count)
train_images, train_labels = images[:train], labels[:train]
val_images, val_labels = images[train:(val+train)], labels[train:(val+train)]
test_images, test_labels = images[-test:], labels[-test:]
train_images.shape, val_images.shape, test_images.shape
Out[16]:
Defining model architecture
In [17]:
model = Sequential([
Input(shape=(48,48,3)),
ZeroPadding2D((5,5)),
Conv2D(16, 3, activation='relu'),
BatchNormalization(),
Conv2D(32, 3, activation='relu'),
BatchNormalization(),
MaxPool2D(pool_size=(2,2)),
Dropout(0.3),
Conv2D(32, 5, activation='relu'),
BatchNormalization(),
MaxPool2D(pool_size=(2,2)),
Dropout(0.3),
Conv2D(64, 3, activation='relu'),
BatchNormalization(),
MaxPool2D(pool_size=(2,2)),
Dropout(0.3),
Flatten(),
Dense(64, activation='relu'),
Dropout(0.5),
Dense(128, activation='relu'),
Dense(2, activation='softmax')
])
model.summary()
Defining model callbacks and compiling the model with the Adam optimizer
In [18]:
checkpoint = ModelCheckpoint(
'./base.model',
monitor='val_accuracy',
verbose=1,
save_best_only=True,
mode='max',
save_weights_only=False,
save_frequency=1
)
earlystop = EarlyStopping(
monitor='val_loss',
min_delta=0.001,
patience=50,
verbose=1,
mode='auto'
)
opt = Adam(lr=1e-3)
callbacks = [checkpoint,earlystop]
model.compile(optimizer=opt,loss='binary_crossentropy',metrics=['accuracy'])
Training the model for 50 epochs with a batch size of 16
In [19]:
epochs = 50
batch_size = 16
history = model.fit(train_images,train_labels,
batch_size=batch_size,
steps_per_epoch=len(train_images)//batch_size,
epochs=epochs,
verbose=1,
validation_data=(val_images,val_labels),
validation_steps=len(val_images)//batch_size,
callbacks=callbacks
)
Loss/Accuracy vs Epoch
In [20]:
show_final_history(history)
model.save("model.h5")
print("Weights Saved")
Making predictions on the test images
In [21]:
test_pred = model.predict(test_images)
test_pred = np.argmax(test_pred,axis=1)
test_actual = np.argmax(test_labels,axis=1)
rnd_idx = random.sample(range(0,400),8)
class_labels = {i:class_name for (class_name,i) in class_name_labels.items()}
for i,idx in enumerate(rnd_idx):
plt.imshow(test_images[idx])
plt.title("Actual: {}\nPredicted: {}".format(class_labels[test_actual[idx]],class_labels[test_pred[idx]]))
plt.grid(None)
plt.show()
pass
deepCC¶
In [22]:
!deepCC model.h