In [1]:
pip install wget
Importing necessary libraries¶
In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
import tensorflow as tf
from tensorflow.keras.models import load_model
%matplotlib inline
# Set random seed
np.random.seed(0)
Importing dataset¶
In [3]:
url = "https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/dataset-of-90s.csv"
!wget https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/dataset-of-90s.csv
In [4]:
data = pd.read_csv(url)
data.head()
Out[4]:
Preprocessing¶
In [5]:
data.target.value_counts()
Out[5]:
In [6]:
data = data.sample(frac=1)
data.head()
Out[6]:
In [7]:
data.info()
In [8]:
data.drop(["track","artist","uri"],axis=1,inplace=True)
In [9]:
unscaled_inputs = data.iloc[:,0:-1]
target = data.iloc[:,[-1]]
In [10]:
scaled_inputs = preprocessing.scale(unscaled_inputs)
scaled_inputs
Out[10]:
In [11]:
samples_count = scaled_inputs.shape[0]
samples_count
Out[11]:
In [12]:
train_samples_count = int(0.8*samples_count)
validation_samples_count = int(0.1*samples_count)
test_samples_count = samples_count - train_samples_count - validation_samples_count
In [13]:
# train:
train_inputs = scaled_inputs[:train_samples_count]
train_targets = target[:train_samples_count]
In [14]:
# validation:
validation_inputs = scaled_inputs[train_samples_count:train_samples_count+validation_samples_count]
validation_targets = target[train_samples_count:train_samples_count+validation_samples_count]
In [15]:
# test:
test_inputs = scaled_inputs[train_samples_count+validation_samples_count:]
test_targets = target[train_samples_count+validation_samples_count:]
In [16]:
# Print the number of targets that are 1s, the total number of samples, and the proportion for training, validation, and test.
print(np.sum(train_targets), train_samples_count, np.sum(train_targets) / train_samples_count)
print(np.sum(validation_targets), validation_samples_count, np.sum(validation_targets) / validation_samples_count)
print(np.sum(test_targets), test_samples_count, np.sum(test_targets) / test_samples_count)
Saving preprocessed data¶
In [17]:
np.savez('Spotify_data_train', inputs=train_inputs, targets=train_targets)
np.savez('Spotify_data_validation', inputs=validation_inputs, targets=validation_targets)
np.savez('Spotify_data_test', inputs=test_inputs, targets=test_targets)
In [18]:
npz = np.load('Spotify_data_train.npz')
train_inputs, train_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
npz = np.load('Spotify_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
npz = np.load('Spotify_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
Model¶
In [19]:
input_size = 15
hidden_layer_size = 50
model = tf.keras.Sequential([
tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
tf.keras.layers.Dense(2, activation='relu'),
tf.keras.layers.Dense(1, activation = 'sigmoid')
])
In [20]:
model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])
In [21]:
batch_size = 300
max_epochs = 20
history = model.fit(train_inputs,
train_targets,
batch_size=batch_size,
epochs=max_epochs,
validation_data=(validation_inputs, validation_targets),
verbose = 2)
In [22]:
model.summary()
Saving the model¶
In [23]:
model.save("song.h5")
In [24]:
#model = load_model("songs.h5")
Analysis¶
In [25]:
print(f"Training loss at epoch 1: {history.history['loss'][0]}")
print(f"Training loss at epoch 20: {history.history['loss'][19]}")
print(f"Validation loss at epoch 1: {history.history['loss'][0]}")
print(f"Validation loss at epoch 20: {history.history['loss'][19]}")
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['Training loss', 'Validation loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
Out[25]:
In [26]:
print(f"Training accuracy at epoch 1: {history.history['accuracy'][0]}")
print(f"Training accuracy at epoch 20: {history.history['accuracy'][19]}")
print(f"Validation accuracy at epoch 20: {history.history['val_accuracy'][0]}")
print(f"Validation accuracy at epoch 20: {history.history['val_accuracy'][19]}")
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['Training accuracy', 'Validation accuracy'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
Out[26]:
Prediction¶
In [27]:
pred = model.predict(test_inputs)
#pred
In [28]:
"""for i in pred:
if i >= 0.6:
print("Hit")
else:
print("Not a Hit")"""
Out[28]:
Evaluation¶
In [29]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets, verbose = 0)
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))
DeepCC¶
In [30]:
!deepCC song.h5