In [1]:
# Import all the necessary libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Sequential
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
Unzip the dataset¶
In [2]:
!wget 'https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/data_1lZhcJn.zip'
!unzip -qo data_1lZhcJn.zip
!rm data_1lZhcJn.zip
Loading and displaying the data¶
In [3]:
#Loading the data file using pandas library
data = pd.read_csv('data.csv', sep = ",")
data.head(3)
Out[3]:
In [4]:
print(data.isna().sum())
data = data.dropna(axis = 1)
In [5]:
data.head(2)
Out[5]:
Visulaising the Features of the Data¶
In [6]:
data.hist(figsize = (18,18))
Out[6]:
In [7]:
# Plotting a heatmap/correlation plot to see how different values are related to each other
plt.figure(figsize=(27,24))
sns.heatmap(data.corr(),annot=False,linewidths=2)
plt.show()
Pre processing the Data¶
In [8]:
# Encoding our diagnostics using label encoder
le = LabelEncoder()
data.iloc[:,1] = le.fit_transform(data.iloc[:,1].values)
In [9]:
print(data.shape)
X = data.iloc[:, 2:31].values
y = data.iloc[:,1].values
# Splitting our dataset into train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, y,test_size = 0.3,random_state = 0, stratify = y)
In [10]:
#Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
In [11]:
# convert the data to categorical labels
from tensorflow.keras.utils import to_categorical
Y_train = to_categorical(Y_train, num_classes=None)
Y_test = to_categorical(Y_test, num_classes=None)
print ("Y = ",Y_train.shape)
print ("X = ",X_train.shape)
Defining our DL Model¶
In [12]:
# Defining the architecture of our deep learning model
model = Sequential()
model.add(Dense(100, activation = "relu", input_dim = 29))
model.add(Dropout(0.2))
model.add(Dense(100, activation = "relu"))
model.add(Dense(2, activation = "softmax"))
model.summary()
In [13]:
# Compiling the model
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
In [14]:
# Run the model for a batch size of 35 for 100 epochs
history = model.fit(X_train,
Y_train,
validation_data = (X_test, Y_test),
batch_size = 35,
epochs = 100
)
In [15]:
# Function to plot "accuracy vs epoch" graphs and "loss vs epoch" graphs for training and validation data
def plot_metrics(model_name, metric = 'accuracy'):
if metric == 'loss':
plt.title("Loss Values")
plt.plot(model_name.history['loss'], label = 'train')
plt.plot(model_name.history['val_loss'], label = 'test')
plt.legend()
plt.show()
else:
plt.title("Accuracy Values")
plt.plot(model_name.history['accuracy'], label='train')
plt.plot(model_name.history['val_accuracy'], label='test')
plt.legend()
plt.show()
In [16]:
plot_metrics(history, 'accuracy')
plot_metrics(history, 'loss')
Save the Model File¶
In [17]:
# Saving our trained model
from tensorflow.keras.models import save_model
if os.path.isfile('best_model.h5') is False:
model.save('best_model.h5')
Checking the Accuracy of the Model by Predicting¶
In [18]:
#Plotting a confusion matrix for checking the performance of our model
Y_pred = np.argmax(model.predict(X_test), axis = 1)
cnf = confusion_matrix(Y_test.argmax(axis = 1), Y_pred)
df_cnf = pd.DataFrame(cnf, range(2), range(2))
sns.set(font_scale = 2)
sns.heatmap(df_cnf, annot = True)
plt.title("Confusion Matrix")
plt.xlabel("True Values")
plt.ylabel("Prediction Values")
plt.show()
IoT Application of This Project¶
Utilising this machine learning model to develop equipment to detect breats cancer can go a long way in helping patients to detect such problems at an early stage and may lead to an early recovery. Data collected from such equipmets will help physicians devise the right course of anction for their patients for a smooth transition to normal life after proper recovery. It is of course of no doubt that the application of such macine learning and deep learning models in the field of healthcare in designing such IoT equipments has had a ahige impact on enhancing the quality of treatment given to patients.
In [19]:
from tensorflow.keras import models
model = models.load_model('best_model.h5')
In [20]:
!deepCC best_model.h5
In [ ]: