ASL Training with gyroscope data¶
Credit: AITS Cainvas Community
Photo by Jay Mike Tee on Giphy
This notebook trains a simple deep learning model for hi
👋 and What's up?
🤘 signs.
In [1]:
#!pip uninstall --yes matplotlib pandas torch
Setup¶
In [2]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
Upload Data¶
- If necessary, open the panel on the left side of Colab by clicking on the >
- Select the files tab in the left panel
- Upload following dataset files from your computer
hi.csv
andsup.csv
In [3]:
if (os.path.exists("./hi.csv") == False or os.path.exists("./sup.csv") == False ) :
print("upload gesture datasets hi.csv and sup.csv in current dir.")
Graph Data (optional)¶
Plot the CSV data on two separate graphs, acceleration and gyroscope, because each data set has different units and scale.
In [4]:
def plot_gesture(urls, filenames, type="Acceleration"):
for url, filename in zip(urls, filenames):
df = pd.read_csv(url)
index = range(1, len(df['aX']) + 1)
fig = plt.figure(figsize=(16,8))
major_ticks = np.arange(-714, 5951, 119)
minor_ticks = np.arange(-714, 5951, 7)
ax = fig.add_subplot(1, 1, 1)
ax.set_xticks(major_ticks)
ax.set_xticks(minor_ticks, minor=True)
ax.set_yticks(major_ticks)
ax.set_yticks(minor_ticks, minor=True)
ax.grid(which='minor', alpha=0.2)
ax.grid(which='major', alpha=0.75)
plt.title(type + " for \"" + os.path.splitext(filename)[0].upper() + "\" gesture")
if ( type == "Acceleration"):
plt.plot(index, df['aX'], 'g.', label='x', linestyle='solid', marker=',')
plt.plot(index, df['aY'], 'b.', label='y', linestyle='solid', marker=',')
plt.plot(index, df['aZ'], 'r.', label='z', linestyle='solid', marker=',')
plt.ylabel("Acceleration (G)")
plt.xlabel("Sample #")
plt.legend()
elif (type == "Gyroscope"):
plt.plot(index, df['gX'], 'g.', label='x', linestyle='solid', marker=',')
plt.plot(index, df['gY'], 'b.', label='y', linestyle='solid', marker=',')
plt.plot(index, df['gZ'], 'r.', label='z', linestyle='solid', marker=',')
plt.ylabel("Gyroscope (deg/sec)")
plt.xlabel("Sample #")
plt.legend()
plt.show()
In [ ]:
plot_gesture(["https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/hi.csv",
"https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/sup.csv"],
["hi.csv", "sup.csv"],
"Acceleration")
In [ ]:
plot_gesture(["https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/hi.csv",
"https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/sup.csv"],
["hi.csv", "sup.csv"],
"Gyroscope")
Train Neural Network¶
Parse and prepare the data¶
Parse the CSV files and transforms them to a format that can be used to train the fully connected neural network.
If you've recorded additional gestures, update the GESTURES
list with the names of the additional CSV files.
In [ ]:
# Set a fixed random seed value, for reproducibility, this will allow us to get
# the same random numbers each time the notebook is run
SEED = 1337
np.random.seed(SEED)
cuda = torch.cuda.is_available()
torch.manual_seed(SEED)
if cuda:
torch.cuda.manual_seed(SEED)
# the list of gestures
GESTURES = [
"hi",
"sup"
]
SAMPLES_PER_GESTURE = 119
NUM_GESTURES = len(GESTURES)
# create a one-hot encoded matrix that is used in the output
ONE_HOT_ENCODED_GESTURES = np.eye(NUM_GESTURES)
inputs = []
outputs = []
# read each csv file and push an input and output
for gesture_index in range(NUM_GESTURES):
gesture = GESTURES[gesture_index]
print(f"Processing index {gesture_index} for gesture '{gesture}'.")
output = gesture_index
if gesture == "hi":
df = pd.read_csv("https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/hi.csv")
elif gesture == "sup":
df = pd.read_csv("https://cainvas-static.s3.amazonaws.com/media/user_data/cainvas-admin/sup.csv")
# get rid of pesky empty value lines of csv which cause NaN inputs
df = df.dropna()
df = df.reset_index(drop=True)
# calculate the number of gesture recordings in the file
num_recordings = int(df.shape[0] / SAMPLES_PER_GESTURE)
print(f"\tThere are {df.shape[0]}/{SAMPLES_PER_GESTURE} = {num_recordings} recordings of the {gesture} gesture.")
for i in range(num_recordings):
tensor = []
for j in range(SAMPLES_PER_GESTURE):
index = i * SAMPLES_PER_GESTURE + j
# normalize the input data, between 0 to 1:
# - acceleration is between: -4 to +4
# - gyroscope is between: -2000 to +2000
tensor += [
(df['aX'][index] + 4) / 8,
(df['aY'][index] + 4) / 8,
(df['aZ'][index] + 4) / 8,
(df['gX'][index] + 2000) / 4000,
(df['gY'][index] + 2000) / 4000,
(df['gZ'][index] + 2000) / 4000
]
inputs.append(tensor)
outputs.append(output)
# convert the list to numpy array
inputs = np.array(inputs)
outputs = np.array(outputs)
print("Data set parsing and preparation complete.")
Randomize and split the input and output pairs for training¶
Randomly split input and output pairs into sets of data: 80% for training and 20% for testing.
- the training set is used to train the model
- the validation set is used to measure how well the model is performing during training
- the testing set is used to test the model after training
In [ ]:
# Randomize the order of the inputs, so they can be evenly distributed for training, testing, and validation
# https://stackoverflow.com/a/37710486/2020087
num_inputs = len(inputs)
randomize = np.arange(num_inputs)
np.random.shuffle(randomize)
# Swap the consecutive indexes (0, 1, 2, etc) with the randomized indexes
inputs = inputs[randomize]
outputs = outputs[randomize]
# Split the recordings (group of samples) into three sets: training, testing and validation
TRAIN_SPLIT = int(0.8 * num_inputs)
TEST_SPLIT = int(0.2 * num_inputs + TRAIN_SPLIT)
inputs_train, inputs_test, inputs_validate = np.split(inputs, [TRAIN_SPLIT, TEST_SPLIT])
outputs_train, outputs_test, outputs_validate = np.split(outputs, [TRAIN_SPLIT, TEST_SPLIT])
# we are training on the entire data, if it's less than 1000
if ( inputs_train.shape[0] < 1000 ):
inputs_train = inputs
outputs_train = outputs
INPUT_LEN = inputs_train.shape[1]
print("Data set randomization and splitting complete.")
Build the Model¶
In [ ]:
class aslModel(nn.Module):
def __init__(self):
super(aslModel, self).__init__()
self.fc1 = nn.Linear(INPUT_LEN, 128)
self.fc2 = nn.Linear(128, 16)
self.fc3 = nn.Linear(16, NUM_GESTURES)
def forward(self, x):
x = x.view((-1, INPUT_LEN))
h = F.relu(self.fc1(x))
h = F.relu(self.fc2(h))
h = F.relu(self.fc3(h))
return h;
class aslTinyModel(nn.Module):
def __init__(self):
super(aslTinyModel, self).__init__()
# Arduino fit 714 -> 10 -> 4 -> 2
self.fc1 = nn.Linear(INPUT_LEN, 8)
self.fc2 = nn.Linear(8, 4)
self.fc3 = nn.Linear(4, NUM_GESTURES)
def forward(self, x):
x = x.view((-1, INPUT_LEN))
h = F.relu(self.fc1(x))
h = F.relu(self.fc2(h))
h = F.relu(self.fc3(h))
return h
#return F.log_softmax(h, dim=1)
model = aslTinyModel()
if cuda:
model.cuda() # CUDA!
#optimizer = optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.SGD(model.parameters(), lr=5e-4, momentum=0.9)
criterion = nn.CrossEntropyLoss()
Train the Model¶
In [ ]:
EPOCHS = 150
for epoch in range(EPOCHS):
model.train()
running_loss = 0.0
for batch_idx, (data, target) in enumerate(zip(inputs_train, np.expand_dims(outputs_train, axis=1))):
# Get Samples
data = torch.from_numpy(np.array(data, dtype=np.float32))
target = torch.from_numpy(np.array(target, dtype=np.int64))
if cuda:
data, target = data.cuda(), target.cuda()
# Init
optimizer.zero_grad()
# Predict
y_pred = model(data)
# Calculate loss
loss = criterion(y_pred, target) #;F.cross_entropy(y_pred, target)
running_loss += loss.cpu().data
# Backpropagation
loss.backward()
optimizer.step()
# Display
if (batch_idx == inputs_train.shape[0]-1):
model.eval()
output = model(torch.from_numpy(np.array(inputs_train, dtype=np.float32)))
pred = output.data.max(1)[1]
d = pred.eq(torch.from_numpy(outputs_train))
accuracy = d.sum().item()/d.size().numel()
print('\rTrain Epoch: {}/{} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {}/{}={:.1f}%'.format(
epoch+1,
EPOCHS,
batch_idx+1,
inputs_train.shape[0],
100. * (batch_idx+1) / inputs_train.shape[0],
running_loss/inputs_train.shape[0],
d.sum(), d.size().numel(), accuracy*100,
end=''))
# for name, param in model.named_parameters():
# if param.requires_grad and name=="fc3.weight":
# print ("\nparameter: ", name, model.fc3.weight.grad)
Evaluate¶
In [ ]:
# Eval
evaluate_x = torch.from_numpy(np.array(inputs_train, dtype=np.float32))
evaluate_y = torch.from_numpy(outputs_train)
if cuda:
evaluate_x, evaluate_y = evaluate_x.cuda(), evaluate_y.cuda()
model.eval()
output = model(evaluate_x)
pred = output.data.max(1)[1]
#print(pred)
d = pred.eq(evaluate_y.data).cpu()
accuracy = d.sum().item()/d.size().numel()
print('\t Test Accuracy: {:.4f}%'.format(accuracy*100))
Verify¶
Graph the models performance vs validation.
Save model¶
In [ ]:
dummy_input = torch.randn(INPUT_LEN)
torch.onnx.export(model, (dummy_input), "./asl_model.onnx", verbose=True)
Validate ONNX model¶
In [ ]:
import onnx
onnx_model=onnx.load_model("./asl_model.onnx")
#print(onnx_model)
Onnx Model Inferencing Validation¶
In [ ]:
#install caffe2
!pip install --user --no-cache-dir -r https://raw.githubusercontent.com/pytorch/pytorch/master/requirements.txt
# Check that the IR is well formed
import caffe2.python.onnx.backend as backend
for gesture, lbl in zip(inputs_train, outputs_train) :
gesture = gesture.astype(np.float32)
onnx.checker.check_model(onnx_model)
rep = backend.prepare(onnx_model, device="CPU")
outputs = rep.run(gesture)
gesture_text = ('hi', 'sup')
print('predicted : ', gesture_text[int(outputs[0][0][1]>=outputs[0][0][0])], '\tactual : ', gesture_text[lbl]); #, outputs[0][0])
install deepC¶
compile ML model for Cortex M4¶
In [ ]:
!deepCC asl_model.onnx
Generate C++ ML model code¶
In [ ]:
from cainvas import cpp2ipynb
cpp2ipynb("./asl_model_deepC/asl_model.cpp")