Mineral Classification¶
Credit: AITS Cainvas Community
Photo by Verstiuk Production on Dribbble
A Mineral Classifier can be used to identify the minerals just by looking at their photographs without any need of human intervention and can thus help humans in mineral exploitation.
Importing the Dataset¶
In [1]:
!wget -N "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/minet.zip"
!unzip -qo minet.zip
!rm minet.zip
Importing necessary Libraries¶
In [2]:
from torchvision.datasets import ImageFolder
from torchvision import transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from collections import Counter
import cv2
from keras.layers import Dense, Flatten, AveragePooling2D, Dropout
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
Data Analysis¶
In [3]:
root_folder = 'minet/train'
target_label = ['biotite', 'bornite', 'chrysocolla', 'malachite',
'muscovite', 'pyrite', 'quartz']
In [4]:
dataset = ImageFolder(root_folder, transform=transforms.ToTensor())
print('Data size: ',len(dataset))
dataset.classes
Out[4]:
In [5]:
#check images of the dataset for first 20 images
fig = plt.figure(figsize=(25, 4))
for i in range(20):
image, label = dataset[i]
ax = fig.add_subplot(2, 10, i+1, xticks=[], yticks = [])
ax.imshow(image.permute(1,2,0))
ax.set_title(target_label[label], color='green')
In [6]:
#count number for each label
count = {}
for i in range(len(dataset)):
_, labels = dataset[i]
label = target_label[labels]
if label not in count:
count[label] = 1
elif label in count:
count[label] += 1
#insert count into dataframe
df = pd.DataFrame(count, index=np.arange(1))
df = df.transpose().reset_index()
df.columns = ['Mineral', 'count']
df
Out[6]:
In [7]:
#plot barplot for the sake of easy to read
sns.barplot(df['Mineral'], df['count'])
plt.title('Dataset for each label');
plt.xticks(rotation=30)
plt.grid(axis='y')
In [8]:
#check image size for all datasets
# checking the shape of data (C x H x W)
height = []
width = []
for i in range(len(dataset)):
image, label = dataset[i]
height.append(image.size(1))
width.append(image.size(2))
print(f"maximum_height:{np.max(height)} \tminimum_height:{np.min(height)} \tmean_height:{np.mean(height)}")
print(f"maximum_width:{np.max(width)} \tminimum_width:{np.min(width)} \tmean_width:{np.mean(width)}")
Load the data with Keras' Data Loader¶
In [9]:
data_path = "minet"
# Data agumentation on train and test
train_datagen = ImageDataGenerator(rescale = 1./255,
zoom_range = 0.2,
rotation_range=15,
horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)
In [10]:
# create dataset train
training_set = train_datagen.flow_from_directory(data_path + '/train',
target_size = (224, 224),
batch_size = 64,
class_mode = 'categorical',
shuffle=True)
# Create test data set
test_set = test_datagen.flow_from_directory(data_path + '/test',
target_size = (224, 224),
batch_size = 64,
class_mode = 'categorical',
shuffle = False)
Model Architecture¶
In [11]:
# Model creation with changes
model = VGG16(input_shape=(224,224,3),include_top=False)
for layer in model.layers:
layer.trainable = False
newModel = model.output
newModel = AveragePooling2D()(newModel)
newModel = Flatten()(newModel)
newModel = Dense(128, activation="relu")(newModel)
newModel = Dropout(0.5)(newModel)
newModel = Dense(7, activation='softmax')(newModel)
model = Model(inputs=model.input, outputs=newModel)
In [12]:
model.summary()
Model Training¶
In [13]:
opt=Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
In [14]:
history = model.fit(training_set,
validation_data = test_set,
epochs=40)
Training Plots¶
In [15]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs=range(len(acc))
In [16]:
plt.plot(epochs,acc,label='Trainin_acc',color='blue')
plt.plot(epochs,val_acc,label='Validation_acc',color='red')
plt.legend()
plt.title("Training and Validation Accuracy")
Out[16]:
In [17]:
plt.plot(epochs,loss,label='Training_loss',color='blue')
plt.plot(epochs,val_loss,label='Validation_loss',color='red')
plt.legend()
plt.title("Training and Validation loss")
Out[17]:
Accessing the performance of the Model¶
In [18]:
training_set.class_indices
Out[18]:
In [19]:
class_dict = {0: 'biotite',
1: 'bornite',
2:'chrysocolla',
3: 'malachite',
4: 'muscovite',
5: 'pyrite',
6: 'quartz'}
In [20]:
file_path = 'minet/test/biotite/0001.jpg'
test_image = cv2.imread(file_path)
test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
test_image = cv2.resize(test_image, (224,224),interpolation=cv2.INTER_CUBIC)
plt.imshow(test_image)
test_image = np.expand_dims(test_image,axis=0)
probs = model.predict(test_image)
pred_class = np.argmax(probs)
pred_class = class_dict[pred_class]
print(pred_class)
In [21]:
file_path = 'minet/test/bornite/0010.jpg'
test_image = cv2.imread(file_path)
test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
test_image = cv2.resize(test_image, (224,224),interpolation=cv2.INTER_CUBIC)
plt.imshow(test_image)
test_image = np.expand_dims(test_image,axis=0)
probs = model.predict(test_image)
pred_class = np.argmax(probs)
pred_class = class_dict[pred_class]
print(pred_class)
In [22]:
file_path = 'minet/test/malachite/0008.jpg'
test_image = cv2.imread(file_path)
test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB)
test_image = cv2.resize(test_image, (224,224),interpolation=cv2.INTER_CUBIC)
plt.imshow(test_image)
test_image = np.expand_dims(test_image,axis=0)
probs = model.predict(test_image)
pred_class = np.argmax(probs)
pred_class = class_dict[pred_class]
print(pred_class)
Saving the Model and Compiling the model with DeepC Compiler¶
In [23]:
model.save("mineral_classification.h5")
In [1]:
!deepCC mineral_classification.h5