NOTE: This Use Case is not purposed for resource constrained devices.
Weather Classifcation¶
Credit: AITS Cainvas Community
Photo by Sergey Galtsev on Dribbble
Image tagging helps in selecting images based on content, especially useful in search engines and other similar applications. Here, we tag images based on the weather of the scene. There are two classes - cloudy, sunny.
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tensorflow.keras import layers, optimizers, models, preprocessing, losses, callbacks
import os
import random
from PIL import Image
import tensorflow as tf
import tensorflow.keras
In [2]:
!wget https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/weather.zip
!unzip -qo weather.zip
!rm weather.zip
In [3]:
# Loading the dataset
path = 'weather/'
input_shape = (256, 256, 3) # default input shape while loading the images
batch = 64
# The train and test datasets
print("Train dataset")
train_ds = preprocessing.image_dataset_from_directory(path+'train', batch_size=batch, label_mode='binary')
print("Test dataset")
test_ds = preprocessing.image_dataset_from_directory(path+'test', batch_size=batch, label_mode='binary')
In [4]:
# How many samples in each class
for t in ['train', 'test']:
print('\n', t.upper())
for x in os.listdir(path + t):
print(x, ' - ', len(os.listdir(path + t + '/' + x)))
The train set is balanced while the test set is imbalanced. A confusion matrix can help in finding the accuracies.
In [5]:
# Looking into the class labels
class_names = train_ds.class_names
print("Train class names: ", train_ds.class_names)
print("Test class names: ", test_ds.class_names)
Visualization¶
In [6]:
num_samples = 4 # the number of samples to be displayed in each class
for x in class_names:
plt.figure(figsize=(20, 20))
filenames = os.listdir(path + 'train/' + x)
for i in range(num_samples):
ax = plt.subplot(1, num_samples, i + 1)
img = Image.open(path +'train/' + x + '/' + filenames[i])
plt.imshow(img)
plt.title(x)
plt.axis("off")
Feel free to load more images and see the various images in the dataset.
It is important to note that the differences in the images are not very contrasting. In some cases, even humans may find it difficult to categorise them with high confidence.
Preprocessing¶
Normalization¶
In [7]:
# Normalizing the pixel values for faster convergence
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
test_ds = test_ds.map(lambda x, y: (normalization_layer(x), y))
The model¶
In [8]:
base_model = tensorflow.keras.applications.DenseNet121(weights='imagenet', input_shape=input_shape, include_top=False) # False, do not include the classification layer of the model
base_model.trainable = False
inputs = tf.keras.Input(shape=input_shape)
x = base_model(inputs, training=False)
x = tensorflow.keras.layers.GlobalAveragePooling2D()(x)
outputs = tensorflow.keras.layers.Dense(1, activation = 'sigmoid')(x) # Add own classififcation layer
model = tensorflow.keras.Model(inputs, outputs)
cb = [callbacks.EarlyStopping(monitor = 'val_loss', patience = 5, restore_best_weights = True)]
model.summary()
In [9]:
model.compile(loss='binary_crossentropy', optimizer=optimizers.Adam(0.01), metrics=['accuracy'])
history = model.fit(train_ds, validation_data = test_ds, epochs=32, callbacks = cb)
In [10]:
model.evaluate(test_ds)
Out[10]:
In [11]:
true_labels, predicted_labels = [], []
for x in test_ds.as_numpy_iterator():
images, labels = x
true_labels.extend(labels.flatten().astype('int'))
output = (model.predict(images)>0.5).astype('int')
predicted_labels.extend(output.flatten())
cm = confusion_matrix(true_labels, predicted_labels)
cm = cm.astype('int') / cm.sum(axis=1)[:, np.newaxis]
for i in range(cm.shape[1]):
for j in range(cm.shape[0]):
plt.text(j, i, format(cm[i, j], '.2f'), horizontalalignment="center", color="black")
plt.imshow(cm, cmap=plt.cm.Blues)
Out[11]:
Plotting the metrics¶
In [12]:
def plot(history, variable, variable2):
plt.plot(range(len(history[variable])), history[variable])
plt.plot(range(len(history[variable2])), history[variable2])
plt.title(variable)
In [13]:
plot(history.history, "accuracy", 'val_accuracy')
In [14]:
plot(history.history, "loss", "val_loss")
Prediction¶
In [15]:
# pick random test data sample from one batch
x = random.randint(0, batch - 1)
for i in test_ds.as_numpy_iterator():
img, label = i
plt.axis('off') # remove axes
plt.imshow(img[x]) # shape from (32, 256, 256, 3) --> (256, 256, 3)
output = model.predict(np.expand_dims(img[x],0))[0][0] # getting output; input shape (256, 256, 3) --> (1, 256, 256, 3)
pred = (output > 0.5).astype('int')
print("Predicted: ", class_names[pred], '(', output, '-->', pred, ')') # Picking the label from class_names base don the model output
print("True: ", class_names[label[x][0].astype('int')])
break
deepC¶
In [ ]:
model.save('weather.h5')
!deepCC weather.h5