NOTE: This Use Case is not purposed for resource constrained devices.
Parkinson's Disease Detection using Parkinson's Spiral Drawing and CNN¶
Credit: AITS Cainvas Community
Photo by Lea Filipo on Dribbble
The dataset was taken from this link
Importing the required Libraries and packages¶
In [1]:
import numpy as np
import cv2
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Dropout, Flatten, Dense, MaxPool2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.optimizers import Adam, SGD
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
from tensorflow.keras.regularizers import l2
import tensorflow as tf
import pandas as pd
In [2]:
!wget https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/Parkinson_disease_detection.zip
!unzip -qo Parkinson_disease_detection.zip
Loading Dataset¶
In [3]:
data_train = np.load('Parkinson_disease_detection/train_set.npz', allow_pickle=True)
x_train = data_train['arr_0']
y_train = data_train['arr_1']
print(x_train.shape)
print(y_train.shape)
In [4]:
data_test = np.load('Parkinson_disease_detection/test_set.npz', allow_pickle=True)
x_test = data_test['arr_0']
y_test = data_test['arr_1']
print(x_test.shape)
print(y_test.shape)
Data Distribution¶
In [5]:
unique_train, count = np.unique(y_train, return_counts=True)
plt.figure(figsize=(20, 10))
sns.barplot(unique_train, count).set_title("Number of training images per category:")
plt.show()
In [6]:
unique_test, count_test = np.unique(y_test, return_counts=True)
plt.figure(figsize=(20, 10))
sns.barplot(unique_test, count_test).set_title("Number of testing images per category:")
plt.show()
As can be seen, though the dataset is balanced but contains very less data points
Augmenting the Dataset¶
In [7]:
train_data_generator = ImageDataGenerator(rotation_range=360,
width_shift_range=0.0,
height_shift_range=0.0,
# brightness_range=[0.5, 1.5],
horizontal_flip=True,
vertical_flip=True)
x = list(x_train)
y = list(y_train)
x_aug_train = []
y_aug_train = []
for (i, v) in enumerate(y):
x_img = x[i]
x_img = np.array(x_img)
x_img = np.expand_dims(x_img, axis=0)
aug_iter = train_data_generator.flow(x_img, batch_size=1, shuffle=True)
for j in range(70):
aug_image = next(aug_iter)[0].astype('uint8')
x_aug_train.append(aug_image)
y_aug_train.append(v)
print(len(x_aug_train))
print(len(y_aug_train))
x_train = x + x_aug_train
y_train = y + y_aug_train
print(len(x_train))
print(len(y_train))
test_data_generator = ImageDataGenerator(rotation_range=360,
width_shift_range=0.0,
height_shift_range=0.0,
# brightness_range=[0.5, 1.5],
horizontal_flip=True,
vertical_flip=True)
x = list(x_test)
y = list(y_test)
x_aug_test = []
y_aug_test = []
for (i, v) in enumerate(y):
x_img = x[i]
x_img = np.array(x_img)
x_img = np.expand_dims(x_img, axis=0)
aug_iter = test_data_generator.flow(x_img, batch_size=1, shuffle=True)
for j in range(20):
aug_image = next(aug_iter)[0].astype('uint8')
x_aug_test.append(aug_image)
y_aug_test.append(v)
print(len(x_aug_test))
print(len(y_aug_test))
x_test = x + x_aug_test
y_test = y + y_aug_test
print(len(x_test))
print(len(y_test))
In [8]:
# print(y_test)
figure1 = plt.figure(figsize=(5, 5))
idx_healthy = [i for (i, v) in enumerate(y_train) if v=='healthy']
img_healthy = x_train[idx_healthy[-1]]
plt.imshow(img_healthy)
plt.title('Spiral Drawing by a Healthy Person')
plt.axis('off')
plt.show()
figure2 = plt.figure(figsize=(5, 5))
idx_parkinson = [i for (i, v) in enumerate(y_train) if v=='parkinson']
img_parkinson = x_train[idx_parkinson[-1]]
plt.imshow(img_parkinson)
plt.title("Spiral Drawing by a Person having Parkinson's Disease")
plt.axis('off')
plt.show()
Images in Test Set¶
In [9]:
figure1 = plt.figure(figsize=(5, 5))
idx_healthy = [i for (i, v) in enumerate(y_test) if v=='healthy']
img_healthy = x_test[idx_healthy[-1]]
plt.imshow(img_healthy)
plt.title('Spiral Drawing by a Healthy Person')
plt.axis('off')
plt.show()
figure2 = plt.figure(figsize=(5, 5))
idx_parkinson = [i for (i, v) in enumerate(y_test) if v=='parkinson']
img_parkinson = x_test[idx_parkinson[-1]]
plt.imshow(img_parkinson)
plt.title("Spiral Drawing by a Person having Parkinson's Disease")
plt.axis('off')
plt.show()
Preprocessing the Images¶
In [10]:
for i in range(len(x_train)):
img = x_train[i]
img = cv2.resize(img, (128, 128))
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
x_train[i] = img
for i in range(len(x_test)):
img = x_test[i]
img = cv2.resize(img, (128, 128))
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
x_test[i] = img
x_train = np.array(x_train)
x_test = np.array(x_test)
x_train = x_train/255.0
x_test = x_test/255.0
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
print(y_train.shape)
label_encoder = LabelEncoder()
y_test = label_encoder.fit_transform(y_test)
print(y_test.shape)
Data Distribution after Augmentation¶
In [11]:
unique_train, count = np.unique(y_train, return_counts=True)
plt.figure(figsize=(20, 10))
sns.barplot(unique_train, count).set_title("Number of training images per category after augmentation:")
plt.show()
In [12]:
unique_test, count_test = np.unique(y_test, return_counts=True)
plt.figure(figsize=(20, 10))
sns.barplot(unique_test, count_test).set_title("Number of test set images per category after augmentation:")
plt.show()
In [13]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
Defining the Model¶
In [14]:
def parkinson_disease_detection_model(input_shape=(128, 128, 1)):
regularizer = tf.keras.regularizers.l2(0.001)
model = Sequential()
model.add(Input(shape=input_shape))
model.add(Conv2D(128, (5, 5), padding='same', strides=(1, 1), name='conv1', activation='relu',
kernel_initializer='glorot_uniform', kernel_regularizer=regularizer))
model.add(MaxPool2D((9, 9), strides=(3, 3)))
model.add(Conv2D(64, (5, 5), padding='same', strides=(1, 1), name='conv2', activation='relu',
kernel_initializer='glorot_uniform', kernel_regularizer=regularizer))
model.add(MaxPool2D((7, 7), strides=(3, 3)))
model.add(Conv2D(32, (3, 3), padding='same', strides=(1, 1), name='conv3', activation='relu',
kernel_initializer='glorot_uniform', kernel_regularizer=regularizer))
model.add(MaxPool2D((5, 5), strides=(2, 2)))
model.add(Conv2D(32, (3, 3), padding='same', strides=(1, 1), name='conv4', activation='relu',
kernel_initializer='glorot_uniform', kernel_regularizer=regularizer))
model.add(MaxPool2D((3, 3), strides=(2, 2)))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu', kernel_initializer='glorot_uniform', name='fc1'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax', kernel_initializer='glorot_uniform', name='fc3'))
optimizer = Adam(3.15e-5)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
return model
In [15]:
model= parkinson_disease_detection_model(input_shape=(128, 128, 1))
model.summary()
Training the Model¶
In [16]:
hist = model.fit(x_train, y_train, batch_size=128, epochs=70, validation_data=(x_test, y_test))
Loss and Accuracy Plot¶
In [17]:
figure = plt.figure(figsize=(10, 10))
plt.plot(hist.history['accuracy'], label='Train_accuracy')
plt.plot(hist.history['val_accuracy'], label='Test_accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc="upper left")
plt.show()
figure2 = plt.figure(figsize=(10, 10))
plt.plot(hist.history['loss'], label='Train_loss')
plt.plot(hist.history['val_loss'], label='Test_loss')
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc="upper left")
plt.show()
Classification Report¶
In [18]:
ypred = model.predict(x_test)
ypred = np.argmax(ypred, axis=1)
y_test_pred = np.argmax(y_test, axis=1)
print(classification_report(y_test_pred, ypred))
Confusion Matrix¶
In [19]:
matrix = confusion_matrix(y_test_pred, ypred)
df_cm = pd.DataFrame(matrix, index=[0, 1], columns=[0, 1])
figure = plt.figure(figsize=(5, 5))
sns.heatmap(df_cm, annot=True, fmt='d')
Out[19]:
Saving the Model¶
In [20]:
model.save('parkinson_disease_detection.h5')
Testing Model on Images¶
In [21]:
labels = ['Healthy', 'Parkinson']
image_healthy = cv2.imread('Parkinson_disease_detection/test_image_healthy.png')
image_parkinson = cv2.imread('Parkinson_disease_detection/test_image_parkinson.png')
image_healthy = cv2.resize(image_healthy, (128, 128))
image_healthy = cv2.cvtColor(image_healthy, cv2.COLOR_BGR2GRAY)
image_healthy = np.array(image_healthy)
image_healthy = np.expand_dims(image_healthy, axis=0)
image_healthy = np.expand_dims(image_healthy, axis=-1)
image_parkinson = cv2.resize(image_parkinson, (128, 128))
image_parkinson = cv2.cvtColor(image_parkinson, cv2.COLOR_BGR2GRAY)
image_parkinson = np.array(image_parkinson)
image_parkinson = np.expand_dims(image_parkinson, axis=0)
image_parkinson = np.expand_dims(image_parkinson, axis=-1)
In [22]:
ypred_healthy = model.predict(image_healthy)
ypred_parkinson = model.predict(image_parkinson)
In [23]:
figure = plt.figure(figsize=(2, 2))
img_healthy = np.squeeze(image_healthy, axis=0)
plt.imshow(img_healthy)
plt.axis('off')
plt.title(f'Prediction by the model: {labels[np.argmax(ypred_healthy[0], axis=0)]}')
plt.show()
In [24]:
figure = plt.figure(figsize=(2, 2))
image_parkinson = np.squeeze(image_parkinson, axis=0)
plt.imshow(image_parkinson)
plt.axis('off')
plt.title(f'Prediction by the model: {labels[np.argmax(ypred_parkinson[0], axis=0)]}')
plt.show()
deepCC¶
In [25]:
!deepCC parkinson_disease_detection.h5