Downloaded the dataset from GitHub. Credit - Human Wildlife Conflict.¶
!git clone "https://github.com/arribada/human-wildlife-conflict.git"
Viewing dataset¶
def view_dataset(path):
print("Number of samples- ")
for f in os.listdir(data_dir):
curr_dir = os.path.join(data_dir, f)
if os.path.isdir(curr_dir):
print(f, "-", len(os.listdir(curr_dir)))
data_dir = 'human-wildlife-conflict/Elephant/Object/'
view_dataset(data_dir)
Number of samples-
human_and_elephant - 807
human - 1502
multiple_obstructing_elephants - 2698
multiple_separate_elephants - 1858
goat - 114
single_elephant - 7615
We will now create a new dataset with classifications of elephant and non-elephant¶
dirs_containing_elephants = []
dirs_not_containing_elephants = []
for f in os.listdir(data_dir):
curr_dir = os.path.join(data_dir, f)
if os.path.isdir(curr_dir):
if "elephant" in f:
dirs_containing_elephants.append(curr_dir)
else:
dirs_not_containing_elephants.append(curr_dir)
elephant_dataset = "elephant_dataset"
elephant_dir = os.path.join(elephant_dataset, "elephant")
not_elephant_dir = os.path.join(elephant_dataset, "not_elephant")
if os.path.isdir(elephant_dataset):
shutil.rmtree(elephant_dataset)
os.makedirs(elephant_dir)
os.makedirs(not_elephant_dir)
for dir in dirs_containing_elephants:
for file in os.listdir(dir):
shutil.copy2(os.path.join(dir, file), elephant_dir)
for dir in dirs_not_containing_elephants:
for file in os.listdir(dir):
shutil.copy2(os.path.join(dir, file), not_elephant_dir)
Let us recheck our dataset¶
data_dir = 'elephant_dataset/'
view_dataset(data_dir)
Number of samples-
elephant - 12114
not_elephant - 1616
Downloading the above created dataset¶
In [1]:
!wget -N "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/elephant_dataset.zip" -O elephant_dataset.zip
!unzip -qo elephant_dataset.zip
!rm elephant_dataset.zip
Import necessary libraries¶
In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, callbacks, optimizers
from sklearn.metrics import confusion_matrix, f1_score
from tensorflow import keras
import os, shutil
import random
from PIL import Image
In [3]:
data_dir = 'elephant_dataset/'
batch_size = 64
# image_size = (32, 32)
image_size = (28, 28)
print("Training set")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
color_mode="grayscale",
image_size=image_size,
seed=113,
shuffle=True,
batch_size=batch_size
)
print("Validation set")
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
color_mode="grayscale",
image_size=image_size,
seed=113,
shuffle=True,
batch_size=batch_size
)
In [4]:
class_names = train_ds.class_names
print(class_names)
In [5]:
Xtrain = np.empty((0,*image_size,1))
ytrain = np.empty((0,1))
for x in train_ds.enumerate():
for y in x[1][0]:
Xtrain = np.append(Xtrain, np.expand_dims(np.array(y),0), axis = 0)
#print(Xtrain.shape)
ytrain = np.append(ytrain, np.array(x[1][1]))
#print(ytrain.shape)
Xtrain.shape, ytrain.shape
Out[5]:
Augmenting images in the train set to increase dataset size¶
In [6]:
# # Augmenting images in the train set to increase dataset size
data_augmentation = tf.keras.Sequential(
[
layers.experimental.preprocessing.RandomFlip("horizontal"), # Flip along vertical axes
layers.experimental.preprocessing.RandomZoom(0.1), # Randomly zoom images in dataset
])
print("Train size (number of samples) before augmentation: ", len(Xtrain))
aug_sample_count = ytrain.tolist().count(float(0.0))//2 - ytrain.tolist().count(float(1.0))
cur_augmented = 0
# Apply only to train set
while(cur_augmented!=aug_sample_count):
for i in range(len(Xtrain)):
if ytrain[i] == 1: # not elephant
aug_image = np.array(data_augmentation(np.expand_dims(Xtrain[0], 0)))
Xtrain = np.append(Xtrain, aug_image.reshape((1, *image_size, 1)), axis = 0)
ytrain = np.append(ytrain, [1])
cur_augmented += 1
if (cur_augmented == aug_sample_count):
break
print("Size (number of samples) of final dataset: ", len(Xtrain))
print(" Dataset shapes: ", Xtrain.shape, ytrain.shape)
#Adding to train_ds
#train_ds = train_ds.concatenate(aug_ds)
#print("Train size (number of batches) after augmentation: ", len(train_ds))
In [7]:
print("Number of samples - ")
for i in range(len(class_names)):
print(class_names[i], "-", ytrain.tolist().count(float(i)))
In [8]:
Xval = np.empty((0,*image_size,1))
yval = np.empty((0,1))
for x in val_ds.enumerate():
for y in x[1][0]:
Xval = np.append(Xval, np.expand_dims(np.array(y),0), axis = 0)
#print(Xtrain.shape)
yval = np.append(yval, np.array(x[1][1]))
#print(ytrain.shape)
Xval.shape, yval.shape
Out[8]:
In [9]:
print("Number of samples - ")
for i in range(len(class_names)):
print(class_names[i], "-", yval.tolist().count(float(i)))
Visualization¶
In [10]:
num_samples = 4 # the number of samples to be displayed in each class
for x in class_names:
plt.figure(figsize=(10, 10))
filenames = os.listdir(data_dir + x)
for i in range(num_samples):
ax = plt.subplot(1, num_samples, i + 1)
img = Image.open(os.path.join(data_dir, x, filenames[i]))
plt.imshow(img)
plt.title(x)
plt.axis("off")
Normalizing the pixel values¶
Pixel values are now integers between 0 and 255. Changing them to the range [0, 1] for faster convergence.
In [11]:
Xtrain = Xtrain/255
Xval = Xval/255
The Model¶
In [12]:
model = keras.models.Sequential([
layers.Conv2D(8, 3, activation='relu', input_shape=Xtrain[0].shape),
layers.MaxPool2D(pool_size=(2, 2)),
layers.Conv2D(16, 3, activation='relu'),
layers.MaxPool2D(pool_size=(2, 2)),
layers.Conv2D(32, 3, activation='relu'),
layers.MaxPool2D(pool_size=(2, 2)),
layers.Flatten(),
layers.Dense(32, activation='relu'),
layers.Dense(1, activation='sigmoid')
])
cb = [callbacks.EarlyStopping(monitor = 'val_loss', patience = 5, restore_best_weights = True)]
In [13]:
model.summary()
In [14]:
model.compile(loss=keras.losses.BinaryCrossentropy(), optimizer=optimizers.Adam(0.0001), metrics=['accuracy'])
history = model.fit(Xtrain, ytrain, validation_data=(Xval, yval), epochs=300, callbacks=cb)
In [15]:
model.evaluate(Xval, yval)
Out[15]:
In [16]:
ypred = (model.predict(Xval)>0.5).astype('int')
In [17]:
cm = confusion_matrix(yval, ypred)
cm = cm.astype('int') / cm.sum(axis=1)[:, np.newaxis]
fig = plt.figure(figsize = (4, 4))
ax = fig.add_subplot(111)
for i in range(cm.shape[1]):
for j in range(cm.shape[0]):
if cm[i,j] > 0.8:
clr = "white"
else:
clr = "black"
ax.text(j, i, format(cm[i, j], '.2f'), horizontalalignment="center", color=clr)
_ = ax.imshow(cm, cmap=plt.cm.Blues)
ax.set_xticks(range(len(class_names)))
ax.set_yticks(range(len(class_names)))
ax.set_xticklabels(class_names, rotation = 90)
ax.set_yticklabels(class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
In [18]:
f1_score(yval, ypred, average = 'binary')
Out[18]:
Plotting the metrics¶
In [19]:
def plot(history, variable1, variable2):
plt.plot(range(len(history[variable1])), history[variable1])
plt.plot(range(len(history[variable2])), history[variable2])
plt.legend([variable1, variable2])
plt.title(variable1)
In [20]:
plot(history.history, "accuracy", 'val_accuracy')
In [21]:
plot(history.history, "loss", 'val_loss')
Prediction¶
In [22]:
# pick random test data sample from one batch
x = random.randint(0, 32 - 1) # default batch size is 32
for i in val_ds.as_numpy_iterator():
img, label = i
plt.axis('off') # remove axes
plt.imshow(img[x]) # shape from (64, 64, 64, 1) --> (64, 64, 1)
output = model.predict(np.expand_dims(img[x],0))[0][0] # getting output; input shape (64, 64, 3) --> (1, 64, 64, 1)
pred = (output > 0.5).astype('int')
print("Predicted: ", class_names[pred], '(', output, '-->', pred, ')') # Picking the label from class_names base don the model output
print("True: ", class_names[label[x]])
break
deepC¶
In [23]:
model.save('elephant_classification.h5')
!deepCC elephant_classification.h5