Importing necessary libraries¶
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import cv2
import os
Uploading the data¶
In [2]:
!wget -N "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/tomato.zip"
!unzip -qo tomato.zip
!rm tomato.zip
Reading and preprocessing images and labels¶
In [3]:
# In our main folder we have 2 folders, train and val, namely and each of them contains 10 folders, 1 for healthy leaves, the others for diseases
train_images = []
train_labels = []
test_images = []
test_labels = []
dataset_path = 'tomato'
for train_test_folder in os.listdir(dataset_path):
# if we are in train folder, we go through disease/healthy folders there
if train_test_folder == 'train':
train_path = os.path.join(dataset_path, train_test_folder)
# for each disease/healthy folder we take folder name as label and go through it to read images
for disease_folder in os.listdir(train_path):
disease_path = os.path.join(train_path, disease_folder)
label = disease_folder.split('___')[1]
# in each disease/healthy folder we read files with jpg format, i.e images and normalize them
for file in os.listdir(disease_path):
if file.endswith('jpg'):
img_path = os.path.join(disease_path, file)
img = cv2.imread(img_path)
r, g, b = img[:, :, 0]/255, img[:, :, 1]/255, img[:, :, 2]/255
img = np.dstack((r, g, b))
train_images.append(img)
train_labels.append(label)
# if we are in val folder, we go through disease/healthy folders there
if train_test_folder == 'val':
test_path = os.path.join(dataset_path, train_test_folder)
# for each disease/healthy folder we take folder name as label and go through it to read images
for disease_folder in os.listdir(test_path):
disease_path = os.path.join(test_path, disease_folder)
label = disease_folder.split('___')[1]
# in each disease/healthy folder we read files with jpg format, i.e images and normalize them
for file in os.listdir(disease_path):
if file.endswith('jpg'):
img_path = os.path.join(disease_path, file)
img = cv2.imread(img_path)
r, g, b = img[:, :, 0]/255, img[:, :, 1]/255, img[:, :, 2]/255
img = np.dstack((r, g, b))
test_images.append(img)
test_labels.append(label)
train_images = np.array(train_images)
train_labels = np.array(train_labels)
test_images = np.array(test_images)
test_labels = np.array(test_labels)
print('Shape of the stacked train images:', train_images.shape)
print('Shape of the train labels:', train_labels.shape)
print('Shape of the stacked test images:', test_images.shape)
print('Shape of the test_labels:', test_labels.shape)
Checking for all leaf categories¶
In [4]:
unique_labels = np.unique(train_labels)
unique_labels
Out[4]:
Encoder function that uses one hot encoding strategy to change string labels into numerical values¶
In [5]:
def encoder(labels):
train_labels = np.zeros((labels.shape[0], 10))
dic = {'Bacterial_spot':0, 'Early_blight':1, 'Late_blight':2, 'Leaf_Mold':3, 'Septoria_leaf_spot':4, 'Spider_mites Two-spotted_spider_mite':5,
'Target_Spot':6, 'Tomato_Yellow_Leaf_Curl_Virus':7, 'Tomato_mosaic_virus':8, 'healthy':9}
for i in range(len(labels)):
train_labels[i, dic[labels[i]]] = 1
return train_labels
Decoder function that will transform predicted results into string labels¶
In [6]:
def decoder(labels):
preds = np.argmax(labels, axis=1)
test_labels = []
dic = {0:'Bacterial_spot', 1:'Early_blight', 2:'Late_blight', 3:'Leaf_Mold', 4:'Septoria_leaf_spot', 5:'Spider_mites Two-spotted_spider_mite',
6:'Target_Spot', 7:'Tomato_Yellow_Leaf_Curl_Virus', 8:'Tomato_mosaic_virus', 9:'healthy'}
for i in preds:
test_labels.append(dic[i])
return np.array(test_labels)
Let's visualize 2 images from each category with their corresponding labels to have an idea about our data¶
In [7]:
row = 5
col = 4
fig, axes = plt.subplots(row, col, figsize=(14, 14))
c = 0
count = 0
for i in range(row):
for j in range(col):
axes[i][j].imshow(train_images[c])
axes[i][j].set_title(train_labels[c])
c += 500
plt.tight_layout()
plt.show()
Encoding our labels¶
In [8]:
train_labels = encoder(train_labels)
test_labels = encoder(test_labels)
Splitting our data into train and validation sets and augmenting train images¶
In [9]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, random_state=123)
In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen_train = ImageDataGenerator(horizontal_flip=True, vertical_flip=True)
train_iter = datagen_train.flow(X_train, y_train, batch_size=64)
Building our model and train¶
In [11]:
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
model = tf.keras.Sequential([
Conv2D(8, (3, 3), input_shape=(X_train.shape[1], X_train.shape[2], X_train.shape[3]), activation='relu', padding='same'),
MaxPooling2D((2, 2), padding='same'),
Conv2D(16, (3, 3), activation='relu', padding='same'),
MaxPooling2D((2, 2), padding='same'),
Conv2D(32, (3, 3), activation='relu', padding='same'),
MaxPooling2D((2, 2), padding='same'),
Flatten(),
Dense(40, activation='relu'),
Dense(10, activation='softmax')
])
model.summary()
In [12]:
# training our model with callbacks: if we have no improvement on validation loss for 10 epochs, we stop and create a checkpoint
cb = [
tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 10, restore_best_weights = True),
tf.keras.callbacks.ModelCheckpoint('model_tomato.h5', monitor = "val_loss", save_best_only = True)
]
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_iter, steps_per_epoch=len(train_iter), epochs=120, validation_data=(X_val, y_val), callbacks = cb)
Visualizing our accuracy, loss and saving the model¶
In [13]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
model.save('model_tomato.h5')
print('Weights saved.')
Testing our model¶
In [14]:
acc = model.evaluate(test_images, test_labels)
In [15]:
# decoding the labels
predicted_labels = decoder(model.predict(test_images))
test_labels = decoder(test_labels)
In [16]:
# visualizing some of our results
row = 3
col = 4
fig, axes = plt.subplots(row, col, figsize=(16, 12))
c = 0
count = 0
for i in range(row):
for j in range(col):
axes[i][j].imshow(test_images[c])
axes[i][j].set_title(f'Predicted: {predicted_labels[c]}', fontsize=14)
axes[i][j].set_xlabel(f'Actual: {test_labels[c]}', fontsize=14)
if (predicted_labels[c] != test_labels[c]):
count+=1
c += 80
plt.tight_layout()
plt.show()
DeepCC¶
In [17]:
!deepCC model_tomato.h5
In [ ]: