Skip to content

Commit 0d06c54

Browse files
authored
Add files via upload
1 parent e0dd8d2 commit 0d06c54

File tree

7 files changed

+449
-2
lines changed

7 files changed

+449
-2
lines changed

README.md

+37-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,37 @@
1-
# HumanActivity-Recogniser
2-
Classifying the physical activities performed by a user based on accelerometer and gyroscope sensor data collected by a smartphone in the user’s pocket. The activities to be classified are: Standing, Sitting, Stairsup, StairsDown, Walking and Cycling.
1+
# Human-Activity-Recognition
2+
3+
## Dataset link :
4+
https://archive.ics.uci.edu/ml/datasets/Heterogeneity+Activity+Recognition
5+
6+
## Libraries used :
7+
Keras, Scikit-Learn, Numpy, Matplotlib and Pandas
8+
9+
## File Structure:
10+
11+
There are 8 main files: 4 for data management, 4 for Machine learning codes and 1 for plotting the results.
12+
13+
### Data management files:
14+
1.As the dataset was very huge (~ 1.4 GB), it was partitioned into 13 files and the scripts 'compress_file.py' and 'compress2.0.py' were used to downsample the dataset stored in these 13 files to obtain 13 compressed files.
15+
16+
2.The scripts 'merge.py' and 'merge2.0.py' are used to merge the compressed files to obtain the dataset which was used for training. The 2.0 scripts were used for merging the accelerometer and gyroscope data.
17+
18+
### Machine Learning codes:
19+
1."main_NN.py" contains the Neural network implementation which was used on the accelerometer and gyroscope data separately.
20+
21+
2."main_RNN.py" contains the LSTM implementation which used on the merged data as well as the accelerometer and gyroscope data separately.
22+
23+
3."main.py" takes in the complete dataset (not the compressed dataset) and implements LSTM.
24+
25+
4."trainingPreprocessedData.py" takes in the dataset (Link:- https://archive.ics.uci.edu/ml/datasets/Smartphone-Based+Recognition+of+Human+Activities+and+Postural+Transitions) and outputs the result, this file was mainly created to see whether our LSTM model was good enough(The accuracy obtained from this preprocessed dataset was 91%).
26+
### Plotting:
27+
Used for plotting the results obtained from "main_NN.py".
28+
29+
## Model:
30+
"model.h5" stores the final model to the problem.
31+
32+
## NOTE:
33+
Final code is run by 'main.py' and for this the dataset must be in the same folder and run the script using python3
34+
35+
36+
37+

main.py

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import os
2+
import numpy as np
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
import time
6+
from keras.models import Sequential
7+
from keras.layers import Dense,LSTM,Dropout
8+
from keras.layers.embeddings import Embedding
9+
from keras.preprocessing import sequence
10+
11+
start_time = time.time() #To keep track of time to run the code
12+
13+
print('Loading data ...')
14+
data1 = pd.read_csv('Phones_accelerometer.csv') #Loading Accelerometer data
15+
data2 = pd.read_csv('Phones_gyroscope.csv') #Loading Gyroscope data
16+
17+
length1 = len(data1)
18+
length2 = len(data2)
19+
length = min(length1, length2) #To make the length of the merged data equal to minimum of the two data
20+
# length = round(0.7*length)
21+
data1 = data1.drop(labels = ['Arrival_Time','Creation_Time','Index', 'User'], axis=1) #Dropping the unnecessary fields
22+
data2 = data2.drop(labels = ['Arrival_Time','Creation_Time','Index', 'User','Model','Device'], axis=1)
23+
24+
data1 = data1.head(length) #Taking only the top 'length' number of entries from both the data
25+
data2 = data2.head(length)
26+
27+
data2.columns = ['x1', 'y1', 'z1', 'gt1'] #Renaming the column values of data2 as data1 would have same 'x','y' and 'z' variables
28+
# print(data2.iloc[[9126682]])
29+
data = pd.concat([data1, data2], axis=1) #Merging both the accelerometer and the gyroscope data
30+
31+
to_drop = ['null'] #To drop the null values fro both data1 and data2
32+
data = data[~data['gt'].isin(to_drop)]
33+
data = data[~data['gt1'].isin(to_drop)]
34+
35+
data = data.drop(labels = ['gt1'], axis=1)
36+
37+
data = data.iloc[::10, :]
38+
39+
cols_to_norm = ['x','y','z', 'x1','y1','z1'] #Normalizing the columns
40+
data[cols_to_norm] = data[cols_to_norm].apply(lambda x: (x - x.mean()) / (x.max() - x.min()))
41+
42+
y = data[['gt']] #Extracting only the action values
43+
data = data.drop(labels = ['gt'], axis = 1)
44+
data = pd.get_dummies(data) #For One Hot Encoding of the data
45+
parameters1 = len(data.columns)
46+
47+
y = pd.get_dummies(y)
48+
parameters2 = len(y.columns)
49+
50+
51+
data = np.array(data)
52+
y = np.array(y)
53+
54+
m = len(data)
55+
crossval = round(3*m/4) #Taking 75% of the data for training and rest 25% for testing
56+
train_data = data[0:crossval,:]
57+
train_data_y = y[0:crossval,:]
58+
59+
test_data = data[crossval:,:]
60+
test_data_y = y[crossval:,:]
61+
62+
X_train = train_data #Reshaping the data into the form required for LSTM
63+
X_train = np.reshape(X_train, (X_train.shape[0],1,X_train.shape[1]))
64+
X_train = np.array(X_train)
65+
66+
y_train = train_data_y
67+
68+
X_test = test_data
69+
X_test = np.reshape(X_test, (X_test.shape[0],1,X_test.shape[1]))
70+
X_test = np.array(X_test)
71+
72+
y_test = test_data_y
73+
np.random.seed(7)
74+
75+
#Making the LSTM model
76+
model = Sequential()
77+
model.add(LSTM(24, input_dim = parameters1,return_sequences=True))
78+
model.add(LSTM(12))
79+
model.add(Dense(6, activation='sigmoid'))
80+
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
81+
82+
print(model.summary())
83+
84+
Accuracy_Arr = np.empty(0)
85+
batch_size_Arr = np.empty(0)
86+
#Fitting data
87+
batch_side = 8
88+
for i in range(5,batch_side):
89+
model.fit(X_train, y_train, epochs=3, batch_size=pow(2,i))
90+
scores = model.evaluate(X_test, y_test, verbose=0)
91+
print("Accuracy: %.2f%%" % (scores[1]*100))
92+
Accuracy_Arr = np.append(Accuracy_Arr,scores[1]*100)
93+
batch_size_Arr = np.append(batch_size_Arr, pow(2,i))
94+
95+
fig = plt.figure() #For making a plot of Accuracy vs batch size
96+
plt.plot(batch_size_Arr, Accuracy_Arr)
97+
plt.xlabel("Batch size")
98+
plt.ylabel("Accuracy of Model(in %)")
99+
plt.show()
100+
fig.save("Merged_data_Accuracy_vs_batch_size.png")
101+
102+
model.save("my_model.h5") #For saving the model

main_NN.py

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import os
2+
import numpy as np
3+
from sklearn.neural_network import MLPClassifier
4+
from sklearn.metrics import classification_report,f1_score
5+
from sklearn.model_selection import train_test_split
6+
import matplotlib.pyplot as plt
7+
import time
8+
9+
##################
10+
from sklearn.ensemble import RandomForestClassifier
11+
###################
12+
13+
#################################### Import Data ##########################################
14+
def import_data(file):
15+
file_dir = os.path.dirname(__file__)
16+
file_path= os.path.join(file_dir,'Compressed_data/'+file) ### Make sure the dataset is in the correct folder
17+
train_data= np.loadtxt(file_path, dtype= float , delimiter= ',', skiprows= 1)
18+
return train_data
19+
20+
############################################## Neural Network Implementation ####################################################
21+
def NeuralNetworkTrain(X_train,y_train,X_test,y_test):
22+
train_scores = np.empty(0)
23+
test_scores = np.empty(0)
24+
indices = np.empty(0)
25+
for i in 5, 10, 15: #### We are taking only one hidden layer, try with different number of layers
26+
print("hidden layer: ",i,"\n")
27+
mlp = MLPClassifier(hidden_layer_sizes=(i,i,i),early_stopping=True,learning_rate='adaptive',learning_rate_init=0.003)
28+
mlp.fit(X_train,y_train)
29+
30+
predictions_train = mlp.predict(X_train)
31+
print("Fitting of train data for size ",i," : \n",classification_report(y_train,predictions_train))
32+
33+
predictions_test = mlp.predict(X_test)
34+
print("Fitting of test data for size ",i," : \n",classification_report(y_test,predictions_test))
35+
36+
train_scores = np.append(train_scores, f1_score(y_train,predictions_train,average='macro'))
37+
test_scores = np.append(test_scores, f1_score(y_test,predictions_test,average='macro'))
38+
indices = np.append(indices,i)
39+
40+
41+
plt.plot(indices, train_scores)
42+
plt.plot(indices,test_scores)
43+
44+
plt.legend(['Train scores','Test scores'],loc='upper left')
45+
plt.show()
46+
47+
48+
###################################################################################################
49+
50+
51+
######################################### Random Forest Implementation ##############################################
52+
def RandomForestTrain(X_train,y_train,X_test,y_test):
53+
train_scores = np.empty(0)
54+
test_scores = np.empty(0)
55+
indices = np.empty(0)
56+
rf = RandomForestClassifier(n_estimators=100)
57+
rf.fit(X_train,y_train)
58+
predictions_train = rf.predict(X_train)
59+
print("Fitting of train data : \n",classification_report(y_train,predictions_train))
60+
61+
predictions_test = rf.predict(X_test)
62+
print("Fitting of test data for size : \n",classification_report(y_test,predictions_test))
63+
64+
#train_scores = np.append(train_scores, f1_score(y_train,predictions_train,average='macro'))
65+
#test_scores = np.append(test_scores, f1_score(y_test,predictions_test,average='macro'))
66+
#indices = np.append(indices,i)
67+
68+
'''
69+
plt.plot(indices, train_scores)
70+
plt.plot(indices,test_scores)
71+
72+
plt.legend(['Train scores','Test scores'],loc='upper left')
73+
plt.show()
74+
'''
75+
76+
######################################################################################################
77+
78+
79+
start_time = time.time() #### Tracking the execution time
80+
81+
data = import_data('gdata.csv') #### Importing data
82+
cross_val = 1060904 #### Row number after which the data of the last user is recorded (For the purpose of crossvalidation)
83+
m = 1060904 #### Training only on data of two users, you guys can check out for 8 users (set m = 993720 for adata and m=1060904 for gdata)
84+
train_data = data[0:m,:]
85+
test_data = data[cross_val:,:]
86+
87+
#Setting up the feature matrix and output vector
88+
parameters = len(train_data[0,:]) - 2 #### Setting up the number of parameters
89+
90+
X_train = train_data[:,1:parameters+1]
91+
y_train = train_data[:,parameters+1]
92+
93+
X_test = test_data[:,1:parameters+1]
94+
y_test = test_data[:,parameters+1]
95+
96+
97+
NeuralNetworkTrain(X_train,y_train,X_test,y_test)
98+
99+
data = import_data('adata.csv') #### Importing data
100+
cross_val = 993720 #### Row number after which the data of the last user is recorded (For the purpose of crossvalidation)
101+
m = 993720 #### Training only on data of two users, you guys can check out for 8 users (set m = 993720 for adata and m=1060904 for gdata)
102+
train_data = data[0:m,:]
103+
test_data = data[cross_val:,:]
104+
105+
#Setting up the feature matrix and output vector
106+
parameters = len(train_data[0,:]) - 2 #### Setting up the number of parameters
107+
108+
X_train = train_data[:,1:parameters+1]
109+
y_train = train_data[:,parameters+1]
110+
111+
X_test = test_data[:,1:parameters+1]
112+
y_test = test_data[:,parameters+1]
113+
114+
NeuralNetworkTrain(X_train,y_train,X_test,y_test)
115+
print ("time elapsed: ", format(time.time() - start_time)) #### This will take 6-7 minutes if you take the entire dataset
116+
#RandomForestTrain(X_train,y_train,X_test,y_test)
117+
#print ("time elapsed: ", format(time.time() - start_time)) #### This is going to take a lot of time maybe half an hour
118+
119+

main_RNN.py

+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import os
2+
import numpy as np
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
import time
6+
from keras.models import Sequential
7+
from keras.layers import Dense,LSTM,GRU,Dropout, Flatten
8+
9+
#################################### Import Data ##########################################
10+
def import_data(file):
11+
file_dir = os.path.dirname(__file__)
12+
file_path= os.path.join(file_dir,'Compressed_data/'+file) ### Make sure the dataset is in the correct folder
13+
train_data= np.loadtxt(file_path, dtype= float , delimiter= ',', skiprows= 1)
14+
return train_data
15+
16+
def normalize(data_vector):
17+
max_data = np.amax(data_vector)
18+
min_data = np.amin(data_vector)
19+
data_vector = -1 + 2*(data_vector -min_data)/(max_data - min_data)
20+
return data_vector
21+
22+
#### Tracking the execution time
23+
start_time = time.time()
24+
25+
#### Importing data
26+
print('Loading data ...')
27+
data = import_data('adata.csv')
28+
29+
m = len(data)
30+
crossval = round(3*m/4)
31+
train_data = data[0:crossval,:]
32+
test_data = data[crossval:,:]
33+
34+
#### Setting up the number of parameters
35+
parameters = len(train_data[0,:]) - 2
36+
37+
#### Normalizing the data
38+
for x in range(1,parameters+1):
39+
train_data[:,x] = normalize(train_data[:,x])
40+
test_data[:,x] = normalize(test_data[:,x])
41+
42+
43+
44+
X_train = train_data[:,1:parameters+1]
45+
46+
X_train = np.reshape(X_train, (X_train.shape[0],1,X_train.shape[1]))
47+
X_train = np.array(X_train)
48+
49+
y_train = train_data[:,parameters+1]
50+
y_train = pd.get_dummies(y_train)
51+
y_train = np.array(y_train)
52+
53+
X_test = test_data[:,1:parameters+1]
54+
X_test = np.reshape(X_test, (X_test.shape[0],1,X_test.shape[1]))
55+
X_test = np.array(X_test)
56+
57+
y_test = test_data[:,parameters+1]
58+
y_test = pd.get_dummies(y_test)
59+
y_test = np.array(y_test)
60+
61+
np.random.seed(7)
62+
63+
AccuracyArr = np.empty(0)
64+
sArr = np.empty(0)
65+
for s in 6,12,18:
66+
model = Sequential()
67+
model.add(LSTM(s, input_shape=(None, parameters), return_sequences= False))
68+
#model.add(Dropout(0.9))
69+
#model.add(LSTM(18, return_sequences=True))
70+
#model.add(Dropout(0.2))
71+
#model.add(LSTM(12))
72+
model.add(Dense(6, activation='sigmoid'))
73+
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
74+
75+
print(model.summary())
76+
model.fit(X_train, y_train, epochs=2, batch_size=40)
77+
# Final evaluation of the model
78+
scores = model.evaluate(X_test, y_test, verbose=0)
79+
AccuracyArr = np.append(AccuracyArr,(scores[1]*100))
80+
sArr = np.append(sArr,s)
81+
print("Accuracy: %.2f%%" % (scores[1]*100))
82+
83+
fig = plt.figure()
84+
plt.plot(sArr,AccuracyArr)
85+
plt.xlabel('#LSTM neurons')
86+
plt.ylabel('Accuracy of model (in %)')
87+
plt.show()
88+
fig.savefig('Accelerometer.png')

model.h5

94.3 KB
Binary file not shown.

0 commit comments

Comments
 (0)