Assessing the grade and quality of fruit¶
Credit: AITS Cainvas Community
Fruits arrive in bulk at industries (like fruit juice or jam or any kind that uses fruit) and vary in quality from fresh to almost rotten.
It is important to categorise them based on their quality so as to not affect the taste and quality of the final manufactured product.
For example, a rotten orange can spoil the taste of the entire juice batch.
Here we have 1080 images of pomegranates divided into 12 categories based on grade and quality with 90 in each.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
import os
import tensorflow.keras
import random
from PIL import Image
Dataset¶
Fruits of 3 grades (G1, G2, G3) are collected. Once the pomegranate fruits are collected they are then imaged for every alternate day up to a duration of eight days, leading into four qualities (Q1, Q2, Q3, Q4) for each grade. Since, the process is recited for three grades, this resulted into a total of 12 classes of effective quality criteria, with four qualities within each grade.
The dataset folder has 12 subfolders, each corresponding to one of the 12 classes. Each of these subfolders has 90 images.
Citation
[1] Kumar R, A., Rajpurohit, V. S., & Bidari, K. Y. (2019). Multi Class Grading and Quality Assessment of Pomegranate Fruits Based on Physical and Visual Parameters. International Journal of Fruit Science, 19(4), 372-396.
[2] Arun Kumar R, Vijay S. Rajpurohit, and Bhairu J. Jirage, "Pomegranate Fruit Quality Assessment Using Machine Intelligence and Wavelet Features," Journal of Horticultural Research, vol. 26, no. 1, pp. 53–60, 2018. doi: 10.2478/johr-2018-0006
!wget https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/Pomegranate.zip
!unzip -qo Pomegranate.zip
!rm Pomegranate.zip
data_dir = 'Pomegranate'
print("Number of samples")
for f in os.listdir(data_dir + '/'):
if os.path.isdir(data_dir + '/' + f):
print(f, " : ", len(os.listdir(data_dir + '/' + f +'/')))
# Splitting into train and validation dataset - 80-20 split.
batch_size = 16
print("Training set")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir, validation_split=0.2, subset="training", seed=113, batch_size=batch_size)
print("Validation set")
val_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir, validation_split=0.2, subset="validation", seed=113, batch_size=batch_size)
# Looking into the class names
class_names = train_ds.class_names
print(class_names)
Visualisation¶
num_samples = 4 # the number of samples to be displayed in each class
for x in class_names:
plt.figure(figsize=(10, 10))
filenames = os.listdir(data_dir + '/' + x)
for i in range(num_samples):
j = i
if filenames[i][-4:] =='xlsx':
j = i+num_samples
ax = plt.subplot(1, num_samples, i + 1)
img = Image.open(data_dir +'/' + x + '/' + filenames[j])
plt.imshow(img)
plt.title(x)
plt.axis("off")
Preprocessing¶
# Normalizing the pixel values - apply to both train and validation set
normalization_layer = tf.keras.Sequential(
[
layers.experimental.preprocessing.Rescaling(1./255)
])
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
val_ds = val_ds.map(lambda x, y: (normalization_layer(x), y))
Model architecture and training¶
# Using transfer learning
base_model = tf.keras.applications.VGG16(weights='imagenet', input_shape=(256, 256, 3), include_top=False) # False, do not include the classification layer of the model
base_model.trainable = False
inputs = tf.keras.Input(shape=(256, 256, 3))
x = base_model(inputs, training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(len(class_names), activation = 'softmax')(x) # Add own classififcation layer
model = tf.keras.Model(inputs, outputs)
# training with a learning rate of 0.1
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.1), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
history1 = model.fit(train_ds, validation_data=val_ds, epochs=16)
model.summary()
# training with a learning rate of 0.01
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss=tf.keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
history2 = model.fit(train_ds, validation_data=val_ds, epochs=16)
output = model.evaluate(val_ds)
Plotting the metrics¶
def plot(history1, history2, variable1, variable2):
# combining metrics from both trainings
var1_history = history1[variable1]
var1_history.extend(history2[variable1])
var2_history = history1[variable2]
var2_history.extend(history2[variable2])
# plotting them
plt.plot(range(len(var1_history)), var1_history)
plt.plot(range(len(var2_history)), var2_history)
plt.legend([variable1, variable2])
plt.title(variable1)
plot(history1.history, history2.history, "accuracy", 'val_accuracy')
plot(history1.history, history2.history, "loss", 'val_loss')
Prediction¶
model.save('pomegranate.h5')
model = tf.keras.models.load_model('pomegranate.h5')
# pick random test data sample from one batch
x = random.randint(0, batch_size - 1)
for i in val_ds.as_numpy_iterator():
img, label = i
plt.axis('off') # remove axes
plt.imshow(img[x]) # shape from (64, 256, 256, 3) --> (256, 256, 3)
output = model.predict(np.expand_dims(img[x],0)) # getting output; input shape (256, 256, 3) --> (1, 256, 256, 3)
pred = np.argmax(output[0]) # finding max
print("Prdicted: ", class_names[pred]) # Picking the label from class_names base don the model output
print("True: ", class_names[label[x]])
print("Probability: ", output[0][pred])
break
deepC¶
#!deepC pomegranate.h5
# pick random test data sample from one batch
x = random.randint(0, batch_size - 1)
for i in val_ds.as_numpy_iterator():
img, label = i # i contains all test samples
np.savetxt('sample.data', (img[x]).flatten()) # xth sample into text file
plt.axis('off')
plt.imshow(img[x])
print("True: ", class_names[label[x]])
break
# run exe with input
!pomegranate_deepC/pomegranate.exe sample.data