Cainvas

Disease Classification with CNN

Credit: AITS Cainvas Community

Photo by Extrafazant on Dribbble

Importing the dataset

In [1]:
!wget -N "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/MedNIST.zip"
!unzip -qo "MedNIST.zip"
!rm "MedNIST.zip"
--2021-08-01 09:25:59--  https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/MedNIST.zip
Resolving cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)... 52.219.64.28
Connecting to cainvas-static.s3.amazonaws.com (cainvas-static.s3.amazonaws.com)|52.219.64.28|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 110557154 (105M) [application/x-zip-compressed]
Saving to: ‘MedNIST.zip’

MedNIST.zip         100%[===================>] 105.43M  98.2MB/s    in 1.1s    

2021-08-01 09:26:00 (98.2 MB/s) - ‘MedNIST.zip’ saved [110557154/110557154]

Allocating test and train directories

In [2]:
test_dir = "Medical/Medical_test"
train_dir = "Medical/Medical_train"

Importing libraries

In [3]:
import os
import numpy as np
import pandas as pd
import random, datetime, os, shutil, math

Defining a function for preparation of the test dataset

In [4]:
def prep_test_data(med, train_dir, test_dir):
  pop = os.listdir(train_dir+'/'+med)
  test_data=random.sample(pop, 2000)
  #print(test_data)
  for f in test_data:
    shutil.copy(train_dir+'/'+med+'/'+f, test_dir+'/'+med+'/')
In [5]:
for medi in os.listdir(train_dir):
  prep_test_data(medi, train_dir, test_dir)
In [6]:
target_classes = os.listdir(train_dir)
num_classes = len(target_classes)
print('Number of target classes:', num_classes)
print(list(enumerate(target_classes)))
Number of target classes: 6
[(0, 'AbdomenCT'), (1, 'ChestCT'), (2, 'Hand'), (3, 'HeadCT'), (4, 'CXR'), (5, 'BreastMRI')]
In [7]:
target_classes = os.listdir(test_dir)
num_classes = len(target_classes)
print('Number of target classes:', num_classes)
print(list(enumerate(target_classes)))
Number of target classes: 6
[(0, 'AbdomenCT'), (1, 'ChestCT'), (2, 'Hand'), (3, 'HeadCT'), (4, 'CXR'), (5, 'BreastMRI')]
In [8]:
#shutil.rmtree("Medical/Medical_test/.ipynb_checkpoints")

Assigning training and test set distributions

In [9]:
training_set_distribution = [len(os.listdir(os.path.join(train_dir, dir))) for dir in os.listdir(train_dir)]
testing_set_distribution = [len(os.listdir(os.path.join(test_dir, dir))) for dir in os.listdir(test_dir)]

Defining a function to display sample image

In [10]:
def show_mri(med):
  num = len(med)
  if num == 0:
    return None
  rows = int(math.sqrt(num))
  cols = (num+1)//rows
  f, axs = plt.subplots(rows, cols)
  fig = 0
  for b in med:
    img = image.load_img(b)
    row = fig // cols
    col = fig % cols
    axs[row, col].imshow(img)
    fig += 1
  plt.show()
In [11]:
import matplotlib.pyplot as plt
from matplotlib.image import imread
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
In [12]:
dir_name = os.path.join(train_dir,"AbdomenCT")
all_images = [os.path.join(dir_name, fname) for fname in os.listdir(dir_name)]
show_mri(all_images[:9])

Image Pre-processing

In [13]:
image_size = (32, 32, 3)
datagen=ImageDataGenerator(rescale = 1./255,
                           shear_range=0.2,
                           zoom_range=0.2,
                           horizontal_flip=True,
                           )
In [14]:
training_set=datagen.flow_from_directory(train_dir,
                                         target_size=image_size[:2],
                                         batch_size=32,
                                         class_mode='categorical',
                                         shuffle=False
                                         #color_mode='rgb'
                                         )
Found 58954 images belonging to 6 classes.
In [15]:
validation_set=datagen.flow_from_directory(test_dir,
                                           target_size=image_size[:2],
                                           batch_size=32,
                                           class_mode='categorical',
                                           shuffle=False
                                           )
Found 29110 images belonging to 6 classes.

Importing and using Callbacks

In [16]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import plot_model
In [17]:
#!pip install pydot
#!pip install graphviz
#!pip install pydotplus
#!sudo apt-get install graphviz
In [18]:
es = EarlyStopping(monitor='val_acc', mode='max', verbose=1, patience=7)
filepath = "modelMedicalMNIST.h5"
ckpt = ModelCheckpoint(filepath, monitor='acc', verbose=1, save_best_only=True, mode='max')
rlp = ReduceLROnPlateau(monitor='acc', patience=3, verbose=1)

Defining CNN model

In [19]:
def cnn(image_size, num_classes):
    classifier = Sequential()
    classifier.add(Conv2D(64, (5, 5), input_shape=image_size, activation='relu', padding='same'))
    classifier.add(MaxPooling2D(pool_size = (2, 2)))
    classifier.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    classifier.add(MaxPooling2D(pool_size = (2, 2)))
    classifier.add(Flatten())
    classifier.add(Dense(num_classes, activation = 'softmax'))
    classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
    return classifier

neuralnetwork_cnn = cnn(image_size, num_classes)
neuralnetwork_cnn.summary()
#plot_model(neuralnetwork_cnn, show_shapes=True) 
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 32, 32, 64)        4864      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 16, 16, 128)       73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 8, 8, 128)         0         
_________________________________________________________________
flatten (Flatten)            (None, 8192)              0         
_________________________________________________________________
dense (Dense)                (None, 6)                 49158     
=================================================================
Total params: 127,878
Trainable params: 127,878
Non-trainable params: 0
_________________________________________________________________

Using Fit()

In [20]:
history = neuralnetwork_cnn.fit_generator(
    generator=training_set, validation_data=validation_set,
    callbacks=[es, ckpt, rlp], epochs = 5, 
)
WARNING:tensorflow:From <ipython-input-20-d53212ee6f04>:3: Model.fit_generator (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.
Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/5
1841/1843 [============================>.] - ETA: 0s - loss: 0.2316 - acc: 0.9245
Epoch 00001: acc improved from -inf to 0.92455, saving model to modelMedicalMNIST.h5
1843/1843 [==============================] - 49s 27ms/step - loss: 0.2314 - acc: 0.9246 - val_loss: 0.0340 - val_acc: 0.9930
Epoch 2/5
1841/1843 [============================>.] - ETA: 0s - loss: 0.0330 - acc: 0.9917
Epoch 00002: acc improved from 0.92455 to 0.99167, saving model to modelMedicalMNIST.h5
1843/1843 [==============================] - 49s 26ms/step - loss: 0.0329 - acc: 0.9917 - val_loss: 0.0234 - val_acc: 0.9949
Epoch 3/5
1841/1843 [============================>.] - ETA: 0s - loss: 0.0412 - acc: 0.9886
Epoch 00003: acc did not improve from 0.99167
1843/1843 [==============================] - 49s 26ms/step - loss: 0.0412 - acc: 0.9886 - val_loss: 0.0179 - val_acc: 0.9958
Epoch 4/5
1841/1843 [============================>.] - ETA: 0s - loss: 0.0159 - acc: 0.9963
Epoch 00004: acc improved from 0.99167 to 0.99632, saving model to modelMedicalMNIST.h5
1843/1843 [==============================] - 49s 26ms/step - loss: 0.0159 - acc: 0.9963 - val_loss: 0.0081 - val_acc: 0.9977
Epoch 5/5
1841/1843 [============================>.] - ETA: 0s - loss: 0.0308 - acc: 0.9947
Epoch 00005: acc did not improve from 0.99632
1843/1843 [==============================] - 49s 26ms/step - loss: 0.0307 - acc: 0.9947 - val_loss: 0.0082 - val_acc: 0.9978

Displaying loss and accuracy graph

In [21]:
fig, ax = plt.subplots(figsize=(20, 6))
pd.DataFrame(history.history).iloc[:, :-1].plot(ax=ax)
Out[21]:
<AxesSubplot:>

Predicting on validation set

In [22]:
batch_size=32
pred=neuralnetwork_cnn.predict_generator(validation_set,steps=306/batch_size)
predicted_class_indices=np.argmax(pred,axis=1)
WARNING:tensorflow:From <ipython-input-22-e16df5100bce>:2: Model.predict_generator (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.
Instructions for updating:
Please use Model.predict, which supports generators.
In [23]:
labels = (validation_set.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

Displaying the category of image and predicted image

In [24]:
filenames=validation_set.filenames[0]
results=pd.DataFrame({"Filename":filenames,
                      "Predictions":predictions})
In [25]:
display(results.head(50))
Filename Predictions
0 AbdomenCT/000001.jpeg AbdomenCT
1 AbdomenCT/000001.jpeg AbdomenCT
2 AbdomenCT/000001.jpeg AbdomenCT
3 AbdomenCT/000001.jpeg AbdomenCT
4 AbdomenCT/000001.jpeg AbdomenCT
5 AbdomenCT/000001.jpeg AbdomenCT
6 AbdomenCT/000001.jpeg ChestCT
7 AbdomenCT/000001.jpeg AbdomenCT
8 AbdomenCT/000001.jpeg AbdomenCT
9 AbdomenCT/000001.jpeg AbdomenCT
10 AbdomenCT/000001.jpeg AbdomenCT
11 AbdomenCT/000001.jpeg AbdomenCT
12 AbdomenCT/000001.jpeg ChestCT
13 AbdomenCT/000001.jpeg AbdomenCT
14 AbdomenCT/000001.jpeg AbdomenCT
15 AbdomenCT/000001.jpeg AbdomenCT
16 AbdomenCT/000001.jpeg AbdomenCT
17 AbdomenCT/000001.jpeg AbdomenCT
18 AbdomenCT/000001.jpeg AbdomenCT
19 AbdomenCT/000001.jpeg AbdomenCT
20 AbdomenCT/000001.jpeg AbdomenCT
21 AbdomenCT/000001.jpeg AbdomenCT
22 AbdomenCT/000001.jpeg AbdomenCT
23 AbdomenCT/000001.jpeg AbdomenCT
24 AbdomenCT/000001.jpeg AbdomenCT
25 AbdomenCT/000001.jpeg AbdomenCT
26 AbdomenCT/000001.jpeg AbdomenCT
27 AbdomenCT/000001.jpeg AbdomenCT
28 AbdomenCT/000001.jpeg AbdomenCT
29 AbdomenCT/000001.jpeg AbdomenCT
30 AbdomenCT/000001.jpeg AbdomenCT
31 AbdomenCT/000001.jpeg AbdomenCT
32 AbdomenCT/000001.jpeg AbdomenCT
33 AbdomenCT/000001.jpeg AbdomenCT
34 AbdomenCT/000001.jpeg AbdomenCT
35 AbdomenCT/000001.jpeg AbdomenCT
36 AbdomenCT/000001.jpeg AbdomenCT
37 AbdomenCT/000001.jpeg AbdomenCT
38 AbdomenCT/000001.jpeg AbdomenCT
39 AbdomenCT/000001.jpeg AbdomenCT
40 AbdomenCT/000001.jpeg AbdomenCT
41 AbdomenCT/000001.jpeg AbdomenCT
42 AbdomenCT/000001.jpeg AbdomenCT
43 AbdomenCT/000001.jpeg AbdomenCT
44 AbdomenCT/000001.jpeg AbdomenCT
45 AbdomenCT/000001.jpeg AbdomenCT
46 AbdomenCT/000001.jpeg AbdomenCT
47 AbdomenCT/000001.jpeg AbdomenCT
48 AbdomenCT/000001.jpeg AbdomenCT
49 AbdomenCT/000001.jpeg AbdomenCT

DeepCC

In [27]:
!deepCC modelMedicalMNIST.h5
[INFO]
Reading [keras model] 'modelMedicalMNIST.h5'
[SUCCESS]
Saved 'modelMedicalMNIST_deepC/modelMedicalMNIST.onnx'
[INFO]
Reading [onnx model] 'modelMedicalMNIST_deepC/modelMedicalMNIST.onnx'
[INFO]
Model info:
  ir_vesion : 5
  doc       : 
[WARNING]
[ONNX]: graph-node conv2d's attribute auto_pad has no meaningful data.
[WARNING]
[ONNX]: graph-node conv2d_1's attribute auto_pad has no meaningful data.
[WARNING]
[ONNX]: terminal (input/output) conv2d_input's shape is less than 1. Changing it to 1.
[WARNING]
[ONNX]: terminal (input/output) dense's shape is less than 1. Changing it to 1.
WARN (GRAPH): found operator node with the same name (dense) as io node.
[INFO]
Running DNNC graph sanity check ...
[SUCCESS]
Passed sanity check.
[INFO]
Writing C++ file 'modelMedicalMNIST_deepC/modelMedicalMNIST.cpp'
[INFO]
deepSea model files are ready in 'modelMedicalMNIST_deepC/' 
[RUNNING COMMAND]
g++ -std=c++11 -O3 -fno-rtti -fno-exceptions -I. -I/opt/tljh/user/lib/python3.7/site-packages/deepC-0.13-py3.7-linux-x86_64.egg/deepC/include -isystem /opt/tljh/user/lib/python3.7/site-packages/deepC-0.13-py3.7-linux-x86_64.egg/deepC/packages/eigen-eigen-323c052e1731 "modelMedicalMNIST_deepC/modelMedicalMNIST.cpp" -D_AITS_MAIN -o "modelMedicalMNIST_deepC/modelMedicalMNIST.exe"
[RUNNING COMMAND]
size "modelMedicalMNIST_deepC/modelMedicalMNIST.exe"
   text	   data	    bss	    dec	    hex	filename
 675357	   3792	    760	 679909	  a5fe5	modelMedicalMNIST_deepC/modelMedicalMNIST.exe
[SUCCESS]
Saved model as executable "modelMedicalMNIST_deepC/modelMedicalMNIST.exe"
In [ ]: