Speech Emotion Recognition is the task of recognizing emotion on the basis of your speech.It has uses in application in song recommendation on the basis of your mood and it has various other applications as well in which mood of a person plays a vital role.¶
In [1]:
# This will load the dataset.You will see a folder called ALL in your workspace.
!wget -N "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/SER.zip"
!unzip -qo SER.zip
!rm SER.zip
Importing Libraries¶
In [2]:
import pandas as pd
import numpy as np
import os
import sys
# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
import librosa
import librosa.display
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
# to play the audio files
from IPython.display import Audio
import tensorflow as tf
import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization ,Activation
from keras.utils import np_utils, to_categorical
from keras.callbacks import ModelCheckpoint
import warnings
if not sys.warnoptions:
warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)
In [3]:
Savee = "ALL/"
In [4]:
savee_directory_list = os.listdir(Savee)
file_emotion = []
file_path = []
for file in savee_directory_list:
file_path.append(Savee + file)
part = file.split('_')[1]
ele = part[:-6]
if ele=='a':
file_emotion.append('angry')
elif ele=='d':
file_emotion.append('disgust')
elif ele=='f':
file_emotion.append('fear')
elif ele=='h':
file_emotion.append('happy')
elif ele=='n':
file_emotion.append('neutral')
elif ele=='sa':
file_emotion.append('sad')
else:
file_emotion.append('surprise')
# dataframe for emotion of files
emotion_df = pd.DataFrame(file_emotion, columns=['Emotions'])
# dataframe for path of files.
path_df = pd.DataFrame(file_path, columns=['Path'])
data_path = pd.concat([emotion_df, path_df], axis=1)
data_path.head()
Out[4]:
Data Visualisation and Exploration¶
First let's plot the count of each emotions in our dataset.
In [5]:
plt.title('Count of Emotions', size=16)
sns.countplot(data_path.Emotions)
plt.ylabel('Count', size=12)
plt.xlabel('Emotions', size=12)
sns.despine(top=True, right=True, left=False, bottom=False)
plt.show()
We can also plot waveplots and spectograms for audio signals
- Waveplots - Waveplots let us know the loudness of the audio at a given time.
- Spectograms - A spectrogram is a visual representation of the spectrum of frequencies of sound or other signals as they vary with time. It’s a representation of frequencies changing with respect to time for given audio/music signals.
In [6]:
def create_waveplot(data, sr, e):
plt.figure(figsize=(10, 3))
plt.title('Waveplot for audio with {} emotion'.format(e), size=15)
librosa.display.waveplot(data, sr=sr)
plt.show()
def create_spectrogram(data, sr, e):
# stft function converts the data into short term fourier transform
X = librosa.stft(data)
Xdb = librosa.amplitude_to_db(abs(X))
plt.figure(figsize=(12, 3))
plt.title('Spectrogram for audio with {} emotion'.format(e), size=15)
librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
#librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log')
plt.colorbar()
In [7]:
emotion='fear'
path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)
Out[7]:
In [8]:
emotion='angry'
path = np.array(data_path.Path[data_path.Emotions==emotion])[1]
data, sampling_rate = librosa.load(path)
create_waveplot(data, sampling_rate, emotion)
create_spectrogram(data, sampling_rate, emotion)
Audio(path)
Out[8]: