NOTE: This Use Case is not purposed for resource constrained devices.
InĀ [1]:
# get the data file
!wget -N https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/train.csv
InĀ [2]:
# import the required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import LSTM, Dense, Bidirectional, Dropout, Dense, Activation, Flatten, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential,Model,load_model
from tensorflow.keras.optimizers import SGD
InĀ [3]:
# read into csv file
df = pd.read_csv('train.csv')
df.head()
Out[3]:
InĀ [4]:
# shuffle data
df = shuffle(df)
df.head()
Out[4]:
InĀ [5]:
X_train = df["sentence"].fillna("fillna").values
y_train = df[["BookRestaurant", "GetWeather", "PlayMusic", "RateBook"]].values
InĀ [6]:
X_train.shape, y_train.shape
Out[6]:
InĀ [7]:
X_train[1], y_train[1]
Out[7]:
PreprocessingĀ¶
InĀ [8]:
text = X_train
InĀ [9]:
Tokenizer = Tokenizer()
InĀ [10]:
# text preprocessing
Tokenizer.fit_on_texts(text)
Tokenizer_vocab_size = len(Tokenizer.word_index) + 1
Tokenizer_vocab_size
Out[10]:
InĀ [11]:
X_train.shape, y_train.shape
Out[11]:
InĀ [12]:
samples = 2500
X_train = X_train[samples:]
y_train = y_train[samples:]
X_val = X_train[:samples]
y_val = y_train[:samples]
InĀ [13]:
X_train.shape, y_train.shape
Out[13]:
InĀ [14]:
X_train_encoded_words = Tokenizer.texts_to_sequences(X_train)
X_val_encoded_words = Tokenizer.texts_to_sequences(X_val)
InĀ [15]:
X_train_encoded_padded_words = sequence.pad_sequences(X_train_encoded_words, maxlen = 100)
X_val_encoded_padded_words = sequence.pad_sequences(X_val_encoded_words, maxlen = 100)
X_train_encoded_padded_words.shape, X_val_encoded_padded_words.shape
Out[15]:
InĀ [16]:
X_val_encoded_padded_words, X_train_encoded_padded_words
Out[16]:
InĀ [17]:
y_train.shape, y_val.shape
Out[17]:
Build and Train the ModelĀ¶
InĀ [18]:
model = Sequential()
model.add(Embedding(Tokenizer_vocab_size, 32, input_length = 100))
model.add(LSTM(10))
model.add(Dropout(0.5))
model.add(Dense(400, activation='relu'))
model.add(Dropout(0.5))
#model.add(Dense(200, activation='relu'))
#model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))
model.summary()
InĀ [19]:
Nadam = tf.keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004)
model.compile(loss='categorical_crossentropy', optimizer=Nadam, metrics=['accuracy'])
InĀ [20]:
history = model.fit(X_train_encoded_padded_words,y_train, epochs = 3, batch_size=32, verbose=1, validation_data=(X_val_encoded_padded_words, y_val))
InĀ [21]:
# save the model
model.save("Intent_Classification.h5")
PlotsĀ¶
InĀ [22]:
#loss
from matplotlib import pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
InĀ [23]:
#accuracy
from matplotlib import pyplot as plt
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='lower right')
plt.show()
Loss and AccuracyĀ¶
InĀ [24]:
model.evaluate(X_val_encoded_padded_words, y_val)
Out[24]:
PredictionsĀ¶
InĀ [25]:
def predict(text):
sentence = text
tokens = Tokenizer.texts_to_sequences([text])
tokens = pad_sequences(tokens, maxlen = 100)
prediction = model.predict(np.array(tokens))
pred = np.argmax(prediction)
classes = ['BookRestaurant','GetWeather','PlayMusic','RateBook']
result = classes[pred]
return result
InĀ [26]:
predict("is it raining ?")
Out[26]:
InĀ [27]:
predict("i would like to book a table at hotel Orion for 29th june")
Out[27]:
InĀ [28]:
predict("An Artist released a new music album ")
Out[28]:
InĀ [29]:
predict("this novel deserves a rating of 10")
Out[29]: