Cainvas

Tomato Disease Detection

Credit: AITS Cainvas Community

Photo by Jonas Mosesson on Dribbble

Importing necessary libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2 
import os

Uploading the data

In [2]:
!wget -N "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/tomato.zip"
!unzip -qo tomato.zip
!rm tomato.zip
--2021-09-09 05:39:06--  https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/tomato.zip
Resolving cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)... 52.219.156.31
Connecting to cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)|52.219.156.31|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 37248042 (36M) [application/x-zip-compressed]
Saving to: ‘tomato.zip’

tomato.zip          100%[===================>]  35.52M   110MB/s    in 0.3s    

2021-09-09 05:39:06 (110 MB/s) - ‘tomato.zip’ saved [37248042/37248042]

Reading and preprocessing images and labels

In [3]:
# In our main folder we have 2 folders, train and val, namely and each of them contains 10 folders, 1 for healthy leaves, the others for diseases
train_images = []
train_labels = []
test_images = []
test_labels = []

dataset_path = 'tomato'
for train_test_folder in os.listdir(dataset_path):
    # if we are in train folder, we go through disease/healthy folders there
    if train_test_folder == 'train':
        train_path = os.path.join(dataset_path, train_test_folder)
        # for each disease/healthy folder we take folder name as label and go through it to read images
        for disease_folder in os.listdir(train_path):
            disease_path = os.path.join(train_path, disease_folder)
            label = disease_folder.split('___')[1]
            # in each disease/healthy folder we read files with jpg format, i.e images and normalize them
            for file in os.listdir(disease_path):
                if file.endswith('jpg'):
                    img_path = os.path.join(disease_path, file)
                    img = cv2.imread(img_path)
                    r, g, b = img[:, :, 0]/255, img[:, :, 1]/255, img[:, :, 2]/255
                    img = np.dstack((r, g, b))
                    train_images.append(img)
                    train_labels.append(label)
                    
    # if we are in val folder, we go through disease/healthy folders there         
    if train_test_folder == 'val':
        test_path = os.path.join(dataset_path, train_test_folder)
        # for each disease/healthy folder we take folder name as label and go through it to read images
        for disease_folder in os.listdir(test_path):
            disease_path = os.path.join(test_path, disease_folder)
            label = disease_folder.split('___')[1]
            # in each disease/healthy folder we read files with jpg format, i.e images and normalize them
            for file in os.listdir(disease_path):
                if file.endswith('jpg'):
                    img_path = os.path.join(disease_path, file)
                    img = cv2.imread(img_path)
                    r, g, b = img[:, :, 0]/255, img[:, :, 1]/255, img[:, :, 2]/255
                    img = np.dstack((r, g, b))
                    test_images.append(img)
                    test_labels.append(label)
                    
train_images = np.array(train_images)
train_labels = np.array(train_labels)
test_images = np.array(test_images)
test_labels = np.array(test_labels)
print('Shape of the stacked train images:', train_images.shape)
print('Shape of the train labels:', train_labels.shape)
print('Shape of the stacked test images:', test_images.shape)
print('Shape of the test_labels:', test_labels.shape)
Shape of the stacked train images: (10000, 64, 64, 3)
Shape of the train labels: (10000,)
Shape of the stacked test images: (1000, 64, 64, 3)
Shape of the test_labels: (1000,)

Checking for all leaf categories

In [4]:
unique_labels = np.unique(train_labels)
unique_labels
Out[4]:
array(['Bacterial_spot', 'Early_blight', 'Late_blight', 'Leaf_Mold',
       'Septoria_leaf_spot', 'Spider_mites Two-spotted_spider_mite',
       'Target_Spot', 'Tomato_Yellow_Leaf_Curl_Virus',
       'Tomato_mosaic_virus', 'healthy'], dtype='<U36')

Encoder function that uses one hot encoding strategy to change string labels into numerical values

In [5]:
def encoder(labels):
    train_labels = np.zeros((labels.shape[0], 10))
    dic = {'Bacterial_spot':0, 'Early_blight':1, 'Late_blight':2, 'Leaf_Mold':3, 'Septoria_leaf_spot':4, 'Spider_mites Two-spotted_spider_mite':5,
          'Target_Spot':6, 'Tomato_Yellow_Leaf_Curl_Virus':7, 'Tomato_mosaic_virus':8, 'healthy':9}
    for i in range(len(labels)):
        train_labels[i, dic[labels[i]]] = 1
    return train_labels

Decoder function that will transform predicted results into string labels

In [6]:
def decoder(labels):
    preds = np.argmax(labels, axis=1)
    test_labels = []
    dic = {0:'Bacterial_spot', 1:'Early_blight', 2:'Late_blight', 3:'Leaf_Mold', 4:'Septoria_leaf_spot', 5:'Spider_mites Two-spotted_spider_mite',
          6:'Target_Spot', 7:'Tomato_Yellow_Leaf_Curl_Virus', 8:'Tomato_mosaic_virus', 9:'healthy'}
    
    for i in preds:
        test_labels.append(dic[i])
    return np.array(test_labels)
    

Let's visualize 2 images from each category with their corresponding labels to have an idea about our data

In [7]:
row = 5
col = 4
fig, axes = plt.subplots(row, col, figsize=(14, 14))
c = 0
count = 0
for i in range(row):
    for j in range(col):
        axes[i][j].imshow(train_images[c])
        axes[i][j].set_title(train_labels[c])
        c += 500
        
plt.tight_layout()
plt.show()