Brain Tumor Detection¶

Photo by UC San Francisco (UCSF) on Medium

We all are aware of the severeness of Cancer.It is estimated that nearly 18,000 adults die due to Brain Tumor and the survival rate tells us that if detected later then the person dies within the span of 5 years.So, it is necessary that we devise a technique for early detection of the brain tumor and in today's Modern World we have the power of AI to help us in the early diagnosis of these tumors.

Importing the Dataset¶

!wget -N "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/brain_tumor.zip"
!unzip -qo brain_tumor.zip 
!rm brain_tumor.zip

--2020-11-03 13:14:24--  https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/brain_tumor.zip
Resolving cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)... 52.219.64.24
Connecting to cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)|52.219.64.24|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7919366 (7.6M) [application/zip]
Saving to: ‘brain_tumor.zip’

brain_tumor.zip     100%[===================>]   7.55M  --.-KB/s    in 0.06s   

2020-11-03 13:14:24 (134 MB/s) - ‘brain_tumor.zip’ saved [7919366/7919366]

Importing Libraries¶

import numpy as np 
import pandas as pd 
import os
from os import listdir
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import imutils    

from tensorflow.keras.models import Model,load_model
from tensorflow.keras.layers import Conv2D,Input,ZeroPadding2D,BatchNormalization,Flatten,Activation,Dense,MaxPooling2D
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle 

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout,MaxPool2D
from keras.layers import Conv2D, Reshape

from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D

# Defining the dataset directory
image_dir="brain_tumor_dataset/"

Since the dataset is small we will augment the data and increase the dataset size¶

# Defining t=directory to store augmented images 
os.makedirs('brain_tumor_dataset/augmented-images')
os.makedirs('brain_tumor_dataset/augmented-images/yes')
os.makedirs('brain_tumor_dataset/augmented-images/no')

Function for performing Data Augmentation¶

def augment_data(file_dir, n_generated_samples, save_to_dir):
    data_gen = ImageDataGenerator(rotation_range=10, 
                                  width_shift_range=0.1, 
                                  height_shift_range=0.1, 
                                  shear_range=0.1, 
                                  brightness_range=(0.3, 1.0),
                                  horizontal_flip=True, 
                                  vertical_flip=True, 
                                  fill_mode='nearest'
                                 )

    for filename in listdir(file_dir):
        image = cv2.imread(file_dir + '/' + filename)
        # reshape the image
        image = image.reshape((1,)+image.shape)
        save_prefix = 'aug_' + filename[:-4]
        i=0
        for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir,save_prefix=save_prefix, save_format='jpg'):
                i += 1
                if i > n_generated_samples:
                    break

augmented_data_path ='brain_tumor_dataset/augmented-images/'

augment_data(file_dir=image_dir+'yes',n_generated_samples=6, save_to_dir=augmented_data_path+'yes')

augment_data(file_dir=image_dir+'no', n_generated_samples=9, save_to_dir=augmented_data_path+'no')

Data Preprocessing¶

# Function to crop the image
def crop_brain_contour(image, plot=False):
    
    # Convert the image to grayscale, and blur it slightly
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    
    thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.erode(thresh, None, iterations=2)
    thresh = cv2.dilate(thresh, None, iterations=2)

    # Find contours in thresholded image, then grab the largest one
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    c = max(cnts, key=cv2.contourArea)
    # extreme points
    extLeft = tuple(c[c[:, :, 0].argmin()][0])
    extRight = tuple(c[c[:, :, 0].argmax()][0])
    extTop = tuple(c[c[:, :, 1].argmin()][0])
    extBot = tuple(c[c[:, :, 1].argmax()][0])
    
    # crop new image out of the original image using the four extreme points (left, right, top, bottom)
    new_image = image[extTop[1]:extBot[1], extLeft[0]:extRight[0]]            

    if plot:
        plt.figure()
        plt.subplot(1, 2, 1)
        plt.imshow(image)
        plt.tick_params(axis='both', which='both', top=False, bottom=False, left=False, right=False,labelbottom=False, labeltop=False, labelleft=False, labelright=False)
        plt.title('Original Image')
        plt.subplot(1, 2, 2)
        plt.imshow(new_image)
        plt.tick_params(axis='both', which='both',top=False, bottom=False, left=False, right=False,labelbottom=False, labeltop=False, labelleft=False, labelright=False)
        plt.title('Cropped Image')
        plt.show()
    
    return new_image

After applying the cropping function¶

ex_img = cv2.imread(image_dir+'yes/Y107.jpg')
ex_crop_img = crop_brain_contour(ex_img, True)

Loading the Data¶

def load_data(dir_list, image_size):

    # load all images in a directory
    X = []
    y = []
    image_width, image_height = image_size
    
    for directory in dir_list:
        for filename in listdir(directory):
            image = cv2.imread(directory+'/'+filename)
            image = crop_brain_contour(image, plot=False)
            image = cv2.resize(image, dsize=(image_width, image_height), interpolation=cv2.INTER_CUBIC)
            # normalize values
            image = image / 255.
            # convert image to numpy array and append it to X
            X.append(image)
            # append a value of 1 to the target array if the image
            # is in the folder named 'yes', otherwise append 0.
            if directory[-3:] == 'yes':
                y.append([1])
            else:
                y.append([0])
                
    X = np.array(X)
    y = np.array(y)
    
    # Shuffle the data
    X, y = shuffle(X, y)
    
    print(f'Number of examples is: {len(X)}')
    print(f'X shape is: {X.shape}')
    print(f'y shape is: {y.shape}')
    
    return X, y

augmented_yes =augmented_data_path+'yes'
augmented_no = augmented_data_path+'no'

IMG_WIDTH, IMG_HEIGHT = (224, 224)

X, y = load_data([augmented_yes, augmented_no], (IMG_WIDTH, IMG_HEIGHT))

Number of examples is: 2059
X shape is: (2059, 224, 224, 3)
y shape is: (2059, 1)

Visualization of data¶

def plot_sample_images(X, y, n=40):
    for label in [0,1]:
        # grab the first n images with the corresponding y values equal to label
        images = X[np.argwhere(y == label)]
        n_images = images[:n]
        
        columns_n = 10
        rows_n = int(n/ columns_n)

        plt.figure(figsize=(10, 8))
        
        i = 1 # current plot        
        for image in n_images:
            plt.subplot(rows_n, columns_n, i)
            plt.imshow(image[0])
            
            # remove ticks
            plt.tick_params(axis='both', which='both', 
                            top=False, bottom=False, left=False, right=False,
                           labelbottom=False, labeltop=False, labelleft=False, labelright=False)
            
            i += 1
        
        label_to_str = lambda label: "Yes" if label == 1 else "No"
        plt.suptitle(f"Brain Tumor: {label_to_str(label)}")
        plt.show()

plot_sample_images(X, y)

Preparing Train,Validation and Test Set¶

def split_data(X, y, test_size=0.2):
       
    X_train, X_test_val, y_train, y_test_val = train_test_split(X, y, test_size=test_size)
    X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=0.5)
    
    return X_train, y_train, X_val, y_val, X_test, y_test

X_train, y_train, X_val, y_val, X_test, y_test = split_data(X, y, test_size=0.1)

print ("number of training examples = " + str(X_train.shape[0]))
print ("number of validation examples = " + str(X_val.shape[0]))
print ("number of test examples = " + str(X_test.shape[0]))

number of training examples = 1853
number of validation examples = 103
number of test examples = 103

Model Architecture¶

model=Sequential()
model.add(Conv2D(filters=32, kernel_size=7, input_shape=(224, 224, 3), activation='relu'))
model.add(BatchNormalization(axis = 3, name = 'bn0'))
model.add(MaxPooling2D(pool_size=4))
model.add(MaxPooling2D(pool_size=4))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 218, 218, 32)      4736      
_________________________________________________________________
bn0 (BatchNormalization)     (None, 218, 218, 32)      128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 54, 54, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 5408)              0         
_________________________________________________________________
dense (Dense)                (None, 1)                 5409      
=================================================================
Total params: 10,273
Trainable params: 10,209
Non-trainable params: 64
_________________________________________________________________

Model Training¶

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(x=X_train, y=y_train, batch_size=32, epochs=22, validation_data=(X_val, y_val))

Epoch 1/22
58/58 [==============================] - 72s 1s/step - loss: 0.7975 - accuracy: 0.6147 - val_loss: 0.6731 - val_accuracy: 0.5146
Epoch 2/22
58/58 [==============================] - 72s 1s/step - loss: 0.4817 - accuracy: 0.7777 - val_loss: 0.6451 - val_accuracy: 0.5728
Epoch 3/22
58/58 [==============================] - 72s 1s/step - loss: 0.3934 - accuracy: 0.8284 - val_loss: 0.5891 - val_accuracy: 0.6408
Epoch 4/22
58/58 [==============================] - 72s 1s/step - loss: 0.3467 - accuracy: 0.8484 - val_loss: 0.5850 - val_accuracy: 0.6117
Epoch 5/22
58/58 [==============================] - 72s 1s/step - loss: 0.3081 - accuracy: 0.8689 - val_loss: 0.4661 - val_accuracy: 0.8155
Epoch 6/22
58/58 [==============================] - 72s 1s/step - loss: 0.2623 - accuracy: 0.8975 - val_loss: 0.4357 - val_accuracy: 0.7961
Epoch 7/22
58/58 [==============================] - 75s 1s/step - loss: 0.2409 - accuracy: 0.9077 - val_loss: 0.4585 - val_accuracy: 0.7573
Epoch 8/22
58/58 [==============================] - 73s 1s/step - loss: 0.2224 - accuracy: 0.9083 - val_loss: 0.3259 - val_accuracy: 0.8738
Epoch 9/22
58/58 [==============================] - 72s 1s/step - loss: 0.1900 - accuracy: 0.9293 - val_loss: 0.2713 - val_accuracy: 0.9126
Epoch 10/22
58/58 [==============================] - 73s 1s/step - loss: 0.1671 - accuracy: 0.9417 - val_loss: 0.4229 - val_accuracy: 0.7573
Epoch 11/22
58/58 [==============================] - 72s 1s/step - loss: 0.1487 - accuracy: 0.9514 - val_loss: 0.2494 - val_accuracy: 0.9126
Epoch 12/22
58/58 [==============================] - 72s 1s/step - loss: 0.1399 - accuracy: 0.9525 - val_loss: 0.4154 - val_accuracy: 0.8252
Epoch 13/22
58/58 [==============================] - 72s 1s/step - loss: 0.1193 - accuracy: 0.9638 - val_loss: 0.2867 - val_accuracy: 0.8350
Epoch 14/22
58/58 [==============================] - 72s 1s/step - loss: 0.1023 - accuracy: 0.9736 - val_loss: 0.2576 - val_accuracy: 0.9126
Epoch 15/22
58/58 [==============================] - 75s 1s/step - loss: 0.0996 - accuracy: 0.9719 - val_loss: 0.4993 - val_accuracy: 0.8544
Epoch 16/22
58/58 [==============================] - 72s 1s/step - loss: 0.0929 - accuracy: 0.9709 - val_loss: 0.2696 - val_accuracy: 0.8641
Epoch 17/22
58/58 [==============================] - 76s 1s/step - loss: 0.0764 - accuracy: 0.9800 - val_loss: 0.2180 - val_accuracy: 0.9223
Epoch 18/22
58/58 [==============================] - 72s 1s/step - loss: 0.0666 - accuracy: 0.9860 - val_loss: 0.4028 - val_accuracy: 0.8058
Epoch 19/22
58/58 [==============================] - 72s 1s/step - loss: 0.0579 - accuracy: 0.9843 - val_loss: 0.2159 - val_accuracy: 0.9320
Epoch 20/22
58/58 [==============================] - 72s 1s/step - loss: 0.0466 - accuracy: 0.9914 - val_loss: 0.3056 - val_accuracy: 0.8544
Epoch 21/22
58/58 [==============================] - 73s 1s/step - loss: 0.0435 - accuracy: 0.9881 - val_loss: 0.2673 - val_accuracy: 0.9223
Epoch 22/22
58/58 [==============================] - 72s 1s/step - loss: 0.0343 - accuracy: 0.9935 - val_loss: 0.2516 - val_accuracy: 0.9029

Accessing the performance of Model¶

model.evaluate(X_test, y_test)

4/4 [==============================] - 1s 138ms/step - loss: 0.4440 - accuracy: 0.8835

[0.4439891278743744, 0.8834951519966125]

Training Plots¶

# Loss Curves
plt.figure(figsize=[14,10])
plt.subplot(211)
plt.plot(history.history['loss'],'r',linewidth=3.0)
plt.plot(history.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss', 'Validation Loss'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Loss',fontsize=16)
plt.title('Loss Curves',fontsize=16)
 
# Accuracy Curves
plt.figure(figsize=[14,10])
plt.subplot(212)
plt.plot(history.history['accuracy'],'r',linewidth=3.0)
plt.plot(history.history['val_accuracy'],'b',linewidth=3.0)
plt.legend(['Training Accuracy', 'Validation Accuracy'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Accuracy',fontsize=16)
plt.title('Accuracy Curves',fontsize=16)

Text(0.5, 1.0, 'Accuracy Curves')

Saving the Trained Model¶

model.save("brain_tumor_model.h5")

Compilation with DeepC¶

!deepCC brain_tumor_model.h5

reading [keras model] from 'brain_tumor_model.h5'
Saved 'brain_tumor_model.onnx'
reading onnx model from file  brain_tumor_model.onnx
Model info:
  ir_vesion :  5 
  doc       : 
WARN (ONNX): terminal (input/output) conv2d_input's shape is less than 1.
             changing it to 1.
WARN (ONNX): terminal (input/output) dense's shape is less than 1.
             changing it to 1.
WARN (GRAPH): found operator node with the same name (dense) as io node.
running DNNC graph sanity check ... passed.
Writing C++ file  brain_tumor_model_deepC/brain_tumor_model.cpp
INFO (ONNX): model files are ready in dir brain_tumor_model_deepC
g++ -std=c++11 -O3 -I. -I/opt/tljh/user/lib/python3.7/site-packages/deepC-0.13-py3.7-linux-x86_64.egg/deepC/include -isystem /opt/tljh/user/lib/python3.7/site-packages/deepC-0.13-py3.7-linux-x86_64.egg/deepC/packages/eigen-eigen-323c052e1731 brain_tumor_model_deepC/brain_tumor_model.cpp -o brain_tumor_model_deepC/brain_tumor_model.exe
Model executable  brain_tumor_model_deepC/brain_tumor_model.exe

Model Files
brain_tumor_model.h5 keras Model
deepSea Compiled Models
brain_tumor_model.exe deepSea Ubuntu