Lung Cancer Detection from CT scans
Credit: AITS Cainvas Community
Photo by Vladimir Marchukov on Dribbble
In this notebook, we will predict whether a given image falls under benign, malignant or normal case from CT scans in The IQ-OTHNCCD lung cancer dataset. We will employ a Convolutional Neural Network to classify the images into one of the three classes. The dataset contains a total of 1190 images representing CT scan slices of 110 cases. These cases are grouped into three classes: normal, benign, and malignant.Out of these, 40 cases are diagnosed as malignant; 15 cases diagnosed with benign; and 55 cases classified as normal cases.
Importing the dataset¶
!wget "https://cainvas-static.s3.amazonaws.com/media/user_data/um4ng-tiw0/Lung_cancer_dataset.zip"
!unzip -qo Lung_cancer_dataset.zip
!rm Lung_cancer_dataset.zip
#Importing necessary libraries
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPool2D, BatchNormalization
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
import cv2
Visualizing the dataset¶
img = cv2.imread("Lung_cancer_dataset/Train/Malignant cases/Malignant case (10).jpg")
plt.title("Malignant Case")
plt.imshow(img)
img_width = 64
img_height = 64
Preparing the data
We have used ImageDataGenerator from keras, in the subsequent cells, to fetch the images along with their labels to train the neural network
datagen = ImageDataGenerator(rescale = 1/255.0, validation_split = 0.1)
train_data_generator = datagen.flow_from_directory(directory="Lung_cancer_dataset/Train", target_size = (img_width, img_height), color_mode="grayscale", class_mode="categorical", batch_size = 16, subset = "training", shuffle="True")
validation_data_generator = datagen.flow_from_directory(directory="Lung_cancer_dataset/Train", target_size = (img_width, img_height),color_mode="grayscale", class_mode="categorical", batch_size = 16,subset = "validation")
The labels¶
We will use one hot encoding here as our data is categorical in nature
train_data_generator.next()[1]
The Model
model = Sequential()
model.add(Conv2D(32, (3,3), input_shape=(img_width, img_height,1), activation="relu"))
model.add(MaxPool2D(2,2))
model.add(Conv2D(64,(3,3), activation="relu"))
model.add(MaxPool2D(3,3))
model.add(Conv2D(32,(3,3), padding ="same", activation="relu"))
model.add(MaxPool2D(2,2))
model.add(Flatten())
model.add(Dense(32,activation="relu"))
#model.add(Dropout(0.2))
model.add(Dense(64, activation="relu"))
#model.add(Dropout(0.3))
model.add(Dense(32,activation="relu"))
#model.add(Dropout(0.4))
model.add(Dense(3, activation="softmax"))
model.summary()
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=['accuracy'])
my_callback = [tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 10, restore_best_weights = True)]
history=model.fit(train_data_generator, steps_per_epoch=len(train_data_generator), epochs=50, validation_data=validation_data_generator, validation_steps = len(validation_data_generator), callbacks=my_callback)
Preparing Test Data for prediction¶
datagen_test = ImageDataGenerator(rescale = 1/255.0)
test_data_generator = datagen.flow_from_directory(directory="Lung_cancer_dataset/Test", target_size = (img_width, img_height), color_mode="grayscale", class_mode="categorical", batch_size = 10, subset = "training")
test_data_generator.next()[1]
Model accuracy and loss trends¶
Lets visualize the accuracy and loss trends throughout the training process
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
Visualizing the predictions of the model on unseen data¶
x,y = test_data_generator.next()
pred_array=[]
max_index_arr = []
for i in range(5):
img = x[i]
img = img.reshape(-1,64,64,1)
pred_val = model.predict(img)
max_idx = np.argmax(pred_val)
pred_array.append(max_idx)
#Making the Output meaningful using named classes
cell_dict = {0:"Benign", 1:"Malignant", 2:"Normal"}
predictions = {}
actual_val = {}
k=0
for arr in y[:5]:
actual_val[k] = cell_dict[np.argmax(arr)]
k+=1
k=0
for pred in pred_array:
predictions[k] = cell_dict[pred]
k+=1
print("ACTUAL:", actual_val)
print("PREDICTIONS:", predictions)
plt.figure(figsize = (20,20))
for i in range(5):
plt.subplot(5,5,i+1)
plt.imshow(x[i], cmap="binary")
plt.title('Original: {}, Predicted: {}'.format(actual_val[i], predictions[i]))
plt.axis('Off')
plt.subplots_adjust(left=1.5, right=2.5, top=1)
plt.show()
model.save("lung_cancer_prediction.h5")
DeepCC¶
!deepCC