Classifying diseases in rice leaves¶

Photo by Dafne, Michael Mazourek on Dribbble

Recognizing 3 types of rice leaf diseases - Bacterial leaf blight, Brown spot, Leaf smut.

Classfying plant species or diseases in plants can sometimes be a challenging task for the human eye, especially for people with limited experience in the field. There is little margin for error allowed when it comes crops and farming as our very livelihoods depend on it. Thus, we use a deep learning approach to try and minimise this error.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
import os
import tensorflow.keras
import random

Dataset¶

The dataset has 3 folder, each with 40 images of a specific disease type.

!wget -N https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/rice_leaf_diseases.zip
!unzip -qo rice_leaf_diseases.zip
!rm rice_leaf_diseases.zip

--2021-09-08 07:30:07--  https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/rice_leaf_diseases.zip
Resolving cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)... 52.219.64.80
Connecting to cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)|52.219.64.80|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 39756697 (38M) [application/zip]
Saving to: ‘rice_leaf_diseases.zip’

rice_leaf_diseases. 100%[===================>]  37.91M  93.8MB/s    in 0.4s    

2021-09-08 07:30:07 (93.8 MB/s) - ‘rice_leaf_diseases.zip’ saved [39756697/39756697]

data_dir = 'rice_leaf_diseases'

print("Number of samples")
for f in os.listdir(data_dir + '/'):
    if os.path.isdir(data_dir + '/' + f):
        print(f, " : ", len(os.listdir(data_dir + '/' + f +'/')))

Number of samples
Leaf smut  :  40
Bacterial leaf blight  :  40
Brown spot  :  40

It is a balanced dataset.

# Splitting into train and validation dataset  - 80-20 split.

batch_size = 16

print("Training set")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir, validation_split=0.2, subset="training", seed=113, batch_size=batch_size)  

print("Validation set")
val_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir, validation_split=0.2, subset="validation", seed=113, batch_size=batch_size)

Training set
Found 120 files belonging to 3 classes.
Using 96 files for training.
Validation set
Found 120 files belonging to 3 classes.
Using 24 files for validation.

# Looking into the class names

class_names = train_ds.class_names
print(class_names)

['Bacterial leaf blight', 'Brown spot', 'Leaf smut']

Visualization¶

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")

print("Shape of one training batch")

for image_batch, labels_batch in train_ds:
    print("Input: ", image_batch.shape)
    print("Labels: ", labels_batch.shape)
    break

Shape of one training batch
Input:  (16, 256, 256, 3)
Labels:  (16,)

# Pre-fetch images into memeory

AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

Preprocessing the dataset¶

# Normalizing the pixel values - apply to both train and validation set

normalization_layer = tf.keras.Sequential(
    [
        layers.experimental.preprocessing.Rescaling(1./255)
    ])


train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))

# Augmenting images in the train set to increase dataset size

data_augmentation = tf.keras.Sequential(
    [
        layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),    # Flip along both axes
        layers.experimental.preprocessing.RandomZoom(0.1),    # Randomly zoom images in dataset
    ])


print("Train size (number of batches) before augmentation: ", len(train_ds))

# Apply only to train set    
aug_ds = train_ds.map(lambda x, y: (data_augmentation(x, training=True), y))

print("Size (number of batches) of augmented dataset: ", len(aug_ds))

#Adding to train_ds
train_ds = train_ds.concatenate(aug_ds)

print("Train size (number of batches) after augmentation: ", len(train_ds))

Train size (number of batches) before augmentation:  6
Size (number of batches) of augmented dataset:  6
Train size (number of batches) after augmentation:  12

Model¶

# Using transfer learning
base_model = tensorflow.keras.applications.Xception(weights='imagenet', input_shape=(256, 256, 3), include_top=False)    # False, do not include the classification layer of the model

base_model.trainable = False

inputs = tf.keras.Input(shape=(256, 256, 3))

x = base_model(inputs, training=False)
x = tensorflow.keras.layers.GlobalAveragePooling2D()(x)
outputs = tensorflow.keras.layers.Dense(len(class_names), activation = 'softmax')(x)    # Add own classififcation layer

model = tensorflow.keras.Model(inputs, outputs)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])

history = model.fit(train_ds, validation_data=val_ds, epochs=16)

Epoch 1/16
12/12 [==============================] - 2s 198ms/step - loss: 0.7742 - accuracy: 0.7240 - val_loss: 0.5587 - val_accuracy: 0.8333
Epoch 2/16
12/12 [==============================] - 1s 99ms/step - loss: 0.2792 - accuracy: 0.9010 - val_loss: 0.4160 - val_accuracy: 0.9583
Epoch 3/16
12/12 [==============================] - 1s 100ms/step - loss: 0.1059 - accuracy: 0.9740 - val_loss: 0.3708 - val_accuracy: 0.9583
Epoch 4/16
12/12 [==============================] - 1s 93ms/step - loss: 0.0896 - accuracy: 0.9688 - val_loss: 0.4155 - val_accuracy: 0.8750
Epoch 5/16
12/12 [==============================] - 1s 98ms/step - loss: 0.0495 - accuracy: 0.9844 - val_loss: 0.4370 - val_accuracy: 0.9583
Epoch 6/16
12/12 [==============================] - 1s 100ms/step - loss: 0.0449 - accuracy: 0.9844 - val_loss: 0.3144 - val_accuracy: 0.9167
Epoch 7/16
12/12 [==============================] - 1s 99ms/step - loss: 0.0567 - accuracy: 0.9792 - val_loss: 0.4029 - val_accuracy: 0.9167
Epoch 8/16
12/12 [==============================] - 1s 98ms/step - loss: 0.0262 - accuracy: 0.9948 - val_loss: 0.3748 - val_accuracy: 0.9583
Epoch 9/16
12/12 [==============================] - 1s 95ms/step - loss: 0.0335 - accuracy: 0.9948 - val_loss: 0.3552 - val_accuracy: 0.9583
Epoch 10/16
12/12 [==============================] - 1s 98ms/step - loss: 0.0599 - accuracy: 0.9844 - val_loss: 0.6014 - val_accuracy: 0.9167
Epoch 11/16
12/12 [==============================] - 1s 100ms/step - loss: 0.0652 - accuracy: 0.9740 - val_loss: 0.2991 - val_accuracy: 0.9167
Epoch 12/16
12/12 [==============================] - 1s 99ms/step - loss: 0.0340 - accuracy: 0.9896 - val_loss: 0.4292 - val_accuracy: 0.9583
Epoch 13/16
12/12 [==============================] - 1s 100ms/step - loss: 0.0221 - accuracy: 0.9896 - val_loss: 0.5594 - val_accuracy: 0.9583
Epoch 14/16
12/12 [==============================] - 1s 99ms/step - loss: 0.0229 - accuracy: 0.9948 - val_loss: 0.4971 - val_accuracy: 0.9583
Epoch 15/16
12/12 [==============================] - 1s 100ms/step - loss: 0.0273 - accuracy: 0.9948 - val_loss: 0.5821 - val_accuracy: 0.9583
Epoch 16/16
12/12 [==============================] - 1s 99ms/step - loss: 0.0138 - accuracy: 1.0000 - val_loss: 0.4687 - val_accuracy: 0.9583

output = model.evaluate(val_ds)

2/2 [==============================] - 0s 22ms/step - loss: 0.4687 - accuracy: 0.9583

model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_2 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
xception (Functional)        (None, 8, 8, 2048)        20861480  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 3)                 6147      
=================================================================
Total params: 20,867,627
Trainable params: 6,147
Non-trainable params: 20,861,480
_________________________________________________________________

Plotting the metrics¶

def plot(history, variable, variable2):
    plt.plot(range(len(history[variable])), history[variable])
    plt.plot(range(len(history[variable2])), history[variable2])
    plt.legend([variable, variable2])
    plt.title(variable)

plot(history.history, "accuracy", 'val_accuracy')

plot(history.history, "loss", "val_loss")

Prediction¶

# pick random test data sample from one batch
x = random.randint(0, batch_size - 1)

for i in val_ds.as_numpy_iterator():
    img, label = i    
    plt.axis('off')   # remove axes
    plt.imshow(img[x])    # shape from (64, 256, 256, 3) --> (256, 256, 3)
    output = model.predict(np.expand_dims(img[x],0))    # getting output; input shape (256, 256, 3) --> (1, 256, 256, 3)
    pred = np.argmax(output[0])    # finding max
    print("Prdicted: ", class_names[pred])    # Picking the label from class_names base don the model output
    print("True: ", class_names[label[x]])
    print("Probability: ", output[0][pred])
    break

Prdicted:  Bacterial leaf blight
True:  Bacterial leaf blight
Probability:  0.99973506

deepC¶

model.save('rice_leaf_disease.h5')

#!deepCC rice_leaf_disease.h5

Model Files
rice_leaf_disease.h5 keras Model
deepSea Compiled Models
rice_leaf_disease.exe deepSea Ubuntu