NOTE: This Use Case is not purposed for resource constrained devices.
Classifying diseases in rice leaves¶
Credit: AITS Cainvas Community
Photo by Dafne, Michael Mazourek on Dribbble
Recognizing 3 types of rice leaf diseases - Bacterial leaf blight, Brown spot, Leaf smut.
Classfying plant species or diseases in plants can sometimes be a challenging task for the human eye, especially for people with limited experience in the field. There is little margin for error allowed when it comes crops and farming as our very livelihoods depend on it. Thus, we use a deep learning approach to try and minimise this error.
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
import os
import tensorflow.keras
import random
Dataset¶
The dataset has 3 folder, each with 40 images of a specific disease type.
In [2]:
!wget -N https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/rice_leaf_diseases.zip
!unzip -qo rice_leaf_diseases.zip
!rm rice_leaf_diseases.zip
In [3]:
data_dir = 'rice_leaf_diseases'
print("Number of samples")
for f in os.listdir(data_dir + '/'):
if os.path.isdir(data_dir + '/' + f):
print(f, " : ", len(os.listdir(data_dir + '/' + f +'/')))
It is a balanced dataset.
In [4]:
# Splitting into train and validation dataset - 80-20 split.
batch_size = 16
print("Training set")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir, validation_split=0.2, subset="training", seed=113, batch_size=batch_size)
print("Validation set")
val_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir, validation_split=0.2, subset="validation", seed=113, batch_size=batch_size)
In [5]:
# Looking into the class names
class_names = train_ds.class_names
print(class_names)
Visualization¶
In [6]:
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(class_names[labels[i]])
plt.axis("off")
In [7]:
print("Shape of one training batch")
for image_batch, labels_batch in train_ds:
print("Input: ", image_batch.shape)
print("Labels: ", labels_batch.shape)
break
In [8]:
# Pre-fetch images into memeory
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
Preprocessing the dataset¶
In [9]:
# Normalizing the pixel values - apply to both train and validation set
normalization_layer = tf.keras.Sequential(
[
layers.experimental.preprocessing.Rescaling(1./255)
])
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
In [10]:
# Augmenting images in the train set to increase dataset size
data_augmentation = tf.keras.Sequential(
[
layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"), # Flip along both axes
layers.experimental.preprocessing.RandomZoom(0.1), # Randomly zoom images in dataset
])
print("Train size (number of batches) before augmentation: ", len(train_ds))
# Apply only to train set
aug_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))
print("Size (number of batches) of augmented dataset: ", len(aug_ds))
#Adding to train_ds
train_ds = train_ds.concatenate(aug_ds)
print("Train size (number of batches) after augmentation: ", len(train_ds))
Model¶
In [11]:
# Using transfer learning
base_model = tensorflow.keras.applications.Xception(weights='imagenet', input_shape=(256, 256, 3), include_top=False) # False, do not include the classification layer of the model
base_model.trainable = False
inputs = tf.keras.Input(shape=(256, 256, 3))
x = base_model(inputs, training=False)
x = tensorflow.keras.layers.GlobalAveragePooling2D()(x)
outputs = tensorflow.keras.layers.Dense(len(class_names), activation = 'softmax')(x) # Add own classififcation layer
model = tensorflow.keras.Model(inputs, outputs)
In [12]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
history = model.fit(train_ds, validation_data=val_ds, epochs=16)
In [13]:
output = model.evaluate(val_ds)
In [14]:
model.summary()
Plotting the metrics¶
In [15]:
def plot(history, variable, variable2):
plt.plot(range(len(history[variable])), history[variable])
plt.plot(range(len(history[variable2])), history[variable2])
plt.legend([variable, variable2])
plt.title(variable)
In [16]:
plot(history.history, "accuracy", 'val_accuracy')
In [17]:
plot(history.history, "loss", "val_loss")
Prediction¶
In [18]:
# pick random test data sample from one batch
x = random.randint(0, batch_size - 1)
for i in val_ds.as_numpy_iterator():
img, label = i
plt.axis('off') # remove axes
plt.imshow(img[x]) # shape from (64, 256, 256, 3) --> (256, 256, 3)
output = model.predict(np.expand_dims(img[x],0)) # getting output; input shape (256, 256, 3) --> (1, 256, 256, 3)
pred = np.argmax(output[0]) # finding max
print("Prdicted: ", class_names[pred]) # Picking the label from class_names base don the model output
print("True: ", class_names[label[x]])
print("Probability: ", output[0][pred])
break
deepC¶
In [20]:
model.save('rice_leaf_disease.h5')
#!deepCC rice_leaf_disease.h5