Age and Gender has always been an important feature of our identity. It is also an important factor in our social life. Predictions of age and Gender made with AI can be applied to many areas such as intelligent human-machine interface development, security, cosmetics, electronic commerce.
Import the Dataset¶
In [1]:
!wget -N "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/age_gender.zip"
!unzip -qo age_gender.zip
!rm age_gender.zip
Import necessary Libraries¶
In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Conv2D,InputLayer, Dropout, BatchNormalization, Flatten, Dense, MaxPooling2D
from tensorflow.keras import utils
from tensorflow.keras.models import Sequential
from keras.callbacks import ModelCheckpoint
Data Analysis¶
In [3]:
Dataset = pd.read_csv('age_gender.csv')
Dataset.head(5)
Out[3]:
In [4]:
Dataset.describe()
Out[4]:
In [5]:
# Transforming pixels which is in string format to numpy array
Dataset['pixels'] = Dataset['pixels'].map(lambda x: np.array(x.split(' '), dtype=np.float32).reshape(48, 48))
In [6]:
# Plotting the data according to age
Dataset['age'].hist()
Out[6]:
In [7]:
# Putting the age into a category
Dataset["age_cat"] = pd.cut(Dataset["age"],
bins=[0., 20., 40.0, 60., 80., np.inf],
labels=[1, 2, 3, 4, 5])
In [8]:
# Counting the category of data
Dataset["age_cat"].value_counts()
Out[8]:
Creation of two dataset for AgeModel and GenderModel Accordingly¶
In [9]:
from sklearn.model_selection import StratifiedShuffleSplit
split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(Dataset, Dataset["age_cat"]):
strat_train_set = Dataset.loc[train_index]
strat_test_set = Dataset.loc[test_index]
In [10]:
def age_cat_proportions(data):
return data["age_cat"].value_counts() / len(data)
train_set, test_set = train_test_split(Dataset, test_size=0.2, random_state=42)
compare_props = pd.DataFrame({
"Overall": age_cat_proportions(Dataset),
"Stratified": age_cat_proportions(strat_test_set),
"Random": age_cat_proportions(test_set),
}).sort_index()
compare_props["Rand. %error"] = 100 * compare_props["Random"] / compare_props["Overall"] - 100
compare_props["Strat. %error"] = 100 * compare_props["Stratified"] / compare_props["Overall"] - 100
In [11]:
compare_props
Out[11]:
In [12]:
for set_ in (strat_train_set, strat_test_set):
set_.drop("age_cat", axis=1, inplace=True)
Data Visualization¶
In [13]:
strat_train_set
Out[13]:
In [14]:
full_dataset = strat_train_set.append(strat_test_set)
In [15]:
full_dataset.head()
Out[15]:
In [16]:
strat_test_set
Out[16]:
In [17]:
full_dataset['pixels'] = full_dataset['pixels'].apply(lambda x: x/255)
age_dist = full_dataset['age'].value_counts()
ethnicity_dist = full_dataset['ethnicity'].value_counts()
gender_dist = full_dataset['gender'].value_counts().rename(index={0:'Male',1:'Female'})
In [18]:
X = np.array(full_dataset['pixels'].tolist())
## Converting pixels from 1D to 3D
X = X.reshape(X.shape[0],48,48,1)
Train-Test Split¶
In [19]:
# split the data into train ad test
np.random.seed(42)
y_age = np.array(full_dataset['age'])
y_gender = np.array(full_dataset['gender'])
print('X',X.shape)
print('y_age',y_age.shape)
print('y_gender',y_gender.shape)
X_train, X_test, y_age_train, y_age_test, y_gender_train, y_gender_test = train_test_split(X,y_age, y_gender, test_size=0.2, random_state=42)
In [20]:
def plot(X,y):
plt.title(y)
plt.imshow(X.reshape(48,48))
plt.show()
In [21]:
plot(full_dataset['pixels'][50],full_dataset['gender'][10])
AgeModel Architecture¶
In [22]:
import tensorflow.keras.layers as L
tf.keras.backend.clear_session()
AgeModel = tf.keras.Sequential([
L.InputLayer(input_shape=(48,48,1)),
L.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
L.BatchNormalization(),
L.MaxPooling2D((2, 2)),
L.Conv2D(64, (3, 3), activation='relu'),
L.MaxPooling2D((2, 2)),
L.Flatten(),
L.Dense(64, activation='relu'),
L.Dropout(rate=0.5),
L.Dense(1)
])
AgeModel.compile(optimizer='adam',
loss='mean_squared_error')
In [23]:
AgeModel.summary()
AgeModel Training¶
In [24]:
checkpointer = ModelCheckpoint('ageModel.h5', monitor='val_loss', mode='min', verbose=2, save_best_only=True)
In [25]:
history = AgeModel.fit(X_train, y_age_train, epochs=50, validation_split=0.2, batch_size=64,callbacks=[checkpointer])
Training Plot¶
In [26]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.show()
Evaluating AgeModel¶
In [27]:
AgeModel.evaluate(X_test,y_age_test)
Out[27]:
In [28]:
y_age_test[:10]
Out[28]:
In [29]:
y_age_pred = AgeModel.predict(X_test[:10])
np.round(y_age_pred)
Out[29]:
GenderModle Architecture¶
In [30]:
##Gender Model
tf.keras.backend.clear_session()
GenderModel = tf.keras.Sequential([
L.InputLayer(input_shape=(48,48,1)),
L.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
L.BatchNormalization(),
L.MaxPooling2D((2, 2)),
L.Conv2D(64, (3, 3), activation='relu'),
L.MaxPooling2D((2, 2)),
L.Flatten(),
L.Dense(64, activation='relu'),
L.Dropout(rate=0.5),
L.Dense(1, activation='sigmoid')
])
GenderModel.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(),
metrics=['accuracy'])
In [31]:
GenderModel.summary()
GenderModel Training¶
In [32]:
Gender_history = GenderModel.fit(
X_train, y_gender_train, epochs=18, validation_split=0.2, batch_size=64)
Training Plot¶
In [33]:
GenderModel.save("GenderModel.h5")
pd.DataFrame(Gender_history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.show()
Evaluating the performance of model¶
In [34]:
loss, acc = GenderModel.evaluate(X_test,y_gender_test,verbose=0)
print('Test loss: {}'.format(loss))
print('Test Accuracy: {}'.format(acc))
In [35]:
y_gender_test[:10]
Out[35]:
In [36]:
y_gender_pred = GenderModel.predict(X_test)
np.transpose(np.round(y_gender_pred))
Out[36]:
Accessing the performance of the model¶
In [37]:
def plot(X,y_age,y_gender):
if y_gender<=0.5:
plt.title('Gender is Male and Age is around ' +str(y_age))
else:
plt.title('Gender is Female and Age is around ' +str(y_age))
plt.imshow(X.reshape(48,48))
plt.show()
In [38]:
n=2
plot(X_test[n],int(y_age_pred[n]),y_gender_pred[n])
In [39]:
n=4
plot(X_test[n],int(y_age_pred[n]),y_gender_pred[n])
In [40]:
n=9
plot(X_test[n],int(y_age_pred[n]),y_gender_pred[n])
Compiling Model with DeepC¶
In [41]:
!deepCC ageModel.h5
In [42]:
!deepCC GenderModel.h5