NOTE: This Use Case is not purposed for resource constrained devices.
There are a variety of insects around us that bite us. These bite effects can vary from itching to venom deposit. Identifying the type of insect is necessary to figure out the best possible way to treat it.
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import random
import os
from PIL import Image
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow.keras
The dataset¶
In [2]:
!wget -N "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/insect_bite.zip"
!unzip -qo insect_bite.zip
!rm insect_bite.zip
In [3]:
data_dir = 'insect bite'
print("Number of samples in - ")
for f in os.listdir(data_dir + '/'):
if os.path.isdir(data_dir + '/' + f):
print('\n'+f.upper())
for fx in os.listdir(data_dir + '/' + f + '/'):
print(fx, " : ", len(os.listdir(data_dir + '/' + f +'/' + fx + '/')))
In [4]:
batch = 8
# The train, val and test datasets
print("Train dataset")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir+'/train', batch_size=batch)
print("Validation dataset")
val_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir+'/validation', batch_size=batch)
print("Test dataset")
test_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir+'/test', batch_size=batch)
In [5]:
# Looking into the class names
class_names = train_ds.class_names
print(class_names)
Visualization¶
In [6]:
num_samples = 3 # the number of samples to be displayed in each class
for x in class_names:
plt.figure(figsize=(10, 10))
filenames = os.listdir(data_dir + '/train/' + x)
for i in range(num_samples):
ax = plt.subplot(1, num_samples, i + 1)
img = Image.open(data_dir + '/train/' + x + '/' + filenames[i])
plt.imshow(img)
plt.title(x)
plt.axis("off")
Preprocessing¶
In [7]:
# Looking into the shape of the batches and individual samples
# Set the input shape
print("Looking into the shape of images and labels in one batch\n")
for image_batch, labels_batch in train_ds:
input_shape = image_batch[0].shape
print("Shape of images input for one batch: ", image_batch.shape)
print("Shape of images labels for one batch: ", labels_batch.shape)
break
In [8]:
# Normalizing the pixel values
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))
In [9]:
# Augmenting images in the train set to increase dataset size
data_augmentation = tf.keras.Sequential(
[
layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"), # Flip along both axes
layers.experimental.preprocessing.RandomZoom(0.1), # Randomly zoom images in dataset
layers.experimental.preprocessing.RandomRotation((-0.1, 0.1))
])
print("Train size (number of batches) before augmentation: ", len(train_ds))
# Apply only to train set
aug_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))
print("Size (number of batches) of augmented dataset: ", len(aug_ds))
#Adding to train_ds
train_ds = train_ds.concatenate(aug_ds)
print("Train size (number of batches) after augmentation: ", len(train_ds))
The model¶
In [10]:
base_model = tensorflow.keras.applications.VGG16(weights='imagenet', input_shape=input_shape, include_top=False) # False, do not include the classification layer of the model
base_model.trainable = False
inputs = tensorflow.keras.Input(shape=input_shape)
x = base_model(inputs, training=False)
x = tensorflow.keras.layers.GlobalAveragePooling2D()(x)
outputs = tensorflow.keras.layers.Dense(len(class_names), activation = 'softmax')(x) # Add own classififcation layer
model = tensorflow.keras.Model(inputs, outputs)
cb = [EarlyStopping(monitor = 'val_loss', patience = 5, restore_best_weights = True)]
model.summary()
In [11]:
model.compile(loss=SparseCategoricalCrossentropy(), optimizer=Adam(0.01), metrics=['accuracy'])
history1 = model.fit(train_ds, validation_data = val_ds, epochs=32, callbacks=cb)
In [12]:
model.compile(loss=SparseCategoricalCrossentropy(), optimizer=Adam(0.001), metrics=['accuracy'])
history2 = model.fit(train_ds, validation_data = val_ds, epochs=32, callbacks=cb)
In [13]:
model.evaluate(test_ds)
Out[13]:
Plotting the metrics¶
In [14]:
def plot(history1, history2, variable1, variable2):
# combining metrics from both trainings
var1_history = history1[variable1]
var1_history.extend(history2[variable1])
var2_history = history1[variable2]
var2_history.extend(history2[variable2])
# plotting them
plt.plot(range(len(var1_history)), var1_history)
plt.plot(range(len(var2_history)), var2_history)
plt.legend([variable1, variable2])
plt.title(variable1)
In [15]:
plot(history1.history, history2.history, "accuracy", 'val_accuracy')
In [16]:
plot(history1.history, history2.history, "loss", 'val_loss')
Prediction¶
In [17]:
# prediction for all samples in the dataset
plt.figure(figsize=(20, 20))
for i in test_ds.as_numpy_iterator():
img, label = i
for x in range(len(label)):
ax = plt.subplot(1, len(label), x + 1)
plt.axis('off') # remove axes
plt.imshow(img[x]) # shape from (64, 256, 256, 3) --> (256, 256, 3)
output = model.predict(np.expand_dims(img[x],0)) # getting output; input shape (256, 256, 3) --> (1, 256, 256, 3)
pred = np.argmax(output[0]) # finding max
t = "Prdicted: " + class_names[pred] # Picking the label from class_names base don the model output
t = t + "\nTrue: " + class_names[label[x]]
t = t + "\nProbability: " + str(output[0][pred])
plt.title(t)
The low accuracy rate is due to the very few number of samples in the training set. Even after augmentation techniques, the dataset is small to obtain high results. This notebook is a proof of concept of how neural networks can be used to differentiate between insect bites.
deepC¶
In [ ]:
model.save('insect.h5')
!deepCC insect.h5