vatsalcode
diff --git a/‎README.md
Lines changed: 37 additions & 2 deletions b/‎README.md
Lines changed: 37 additions & 2 deletions
diff --git a/‎main.py
Lines changed: 102 additions & 0 deletions b/‎main.py
Lines changed: 102 additions & 0 deletions
diff --git a/‎main_NN.py
Lines changed: 119 additions & 0 deletions b/‎main_NN.py
Lines changed: 119 additions & 0 deletions
diff --git a/‎main_RNN.py
Lines changed: 88 additions & 0 deletions b/‎main_RNN.py
Lines changed: 88 additions & 0 deletions
diff --git a/‎model.h5
94.3 KB b/‎model.h5
94.3 KB
@@ -1,2 +1,37 @@
-# HumanActivity-Recogniser
-Classifying the physical activities performed by a user based on accelerometer and gyroscope sensor data collected by a smartphone in the user’s pocket. The activities to be classified are: Standing, Sitting, Stairsup, StairsDown, Walking and Cycling. 
+# Human-Activity-Recognition
+
+## Dataset link :
+https://archive.ics.uci.edu/ml/datasets/Heterogeneity+Activity+Recognition
+
+## Libraries used :
+Keras, Scikit-Learn, Numpy, Matplotlib and Pandas
+
+## File Structure: 
+
+There are 8 main files: 4 for data management, 4 for Machine learning codes and 1 for plotting the results.
+
+### Data management files: 
+1.As the dataset was very huge (~ 1.4 GB), it was partitioned into 13 files and the scripts 'compress_file.py' and 'compress2.0.py' were used to downsample the dataset stored in these 13 files to obtain 13 compressed files. 
+
+2.The scripts 'merge.py' and 'merge2.0.py' are used to merge the compressed files to obtain the dataset which was used for training. The 2.0 scripts were used for merging the accelerometer and gyroscope data.
+
+### Machine Learning codes: 
+1."main_NN.py" contains the Neural network implementation which was used on the accelerometer and gyroscope data separately. 
+
+2."main_RNN.py" contains the LSTM implementation which used on the merged data as well as the accelerometer and gyroscope data separately. 
+
+3."main.py" takes in the complete dataset (not the compressed dataset) and implements LSTM. 
+
+4."trainingPreprocessedData.py" takes in the dataset (Link:- https://archive.ics.uci.edu/ml/datasets/Smartphone-Based+Recognition+of+Human+Activities+and+Postural+Transitions) and outputs the result, this file was mainly created to see whether our LSTM model was good enough(The accuracy obtained from this preprocessed dataset was 91%).
+### Plotting:
+Used for plotting the results obtained from "main_NN.py".
+
+## Model:
+"model.h5" stores the final model to the problem.
+
+## NOTE:
+Final code is run by 'main.py' and for this the dataset must be in the same folder and run the script using python3  
+
+
+
+
@@ -0,0 +1,102 @@
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import time
+from keras.models import Sequential
+from keras.layers import Dense,LSTM,Dropout
+from keras.layers.embeddings import Embedding
+from keras.preprocessing import  sequence
+
+start_time = time.time()					#To keep track of time to run the code
+
+print('Loading data ...')
+data1 = pd.read_csv('Phones_accelerometer.csv')		#Loading Accelerometer data
+data2 = pd.read_csv('Phones_gyroscope.csv')			#Loading Gyroscope data
+
+length1 = len(data1)
+length2 = len(data2)
+length = min(length1, length2)						#To make the length of the merged data equal to minimum of the two data 
+# length = round(0.7*length)
+data1 = data1.drop(labels = ['Arrival_Time','Creation_Time','Index', 'User'], axis=1)		#Dropping the unnecessary fields
+data2 = data2.drop(labels = ['Arrival_Time','Creation_Time','Index', 'User','Model','Device'], axis=1)
+
+data1 = data1.head(length)							#Taking only the top 'length' number of entries from both the data
+data2 = data2.head(length)
+
+data2.columns = ['x1', 'y1', 'z1', 'gt1']					#Renaming the column values of data2 as data1 would have same 'x','y' and 'z' variables
+# print(data2.iloc[[9126682]])
+data = pd.concat([data1, data2], axis=1)			#Merging both the accelerometer and the gyroscope data			
+
+to_drop = ['null']									#To drop the null values fro both data1 and data2
+data = data[~data['gt'].isin(to_drop)]
+data = data[~data['gt1'].isin(to_drop)]
+
+data = data.drop(labels = ['gt1'], axis=1)
+
+data = data.iloc[::10, :]
+
+cols_to_norm = ['x','y','z', 'x1','y1','z1']		#Normalizing the columns
+data[cols_to_norm] = data[cols_to_norm].apply(lambda x: (x - x.mean()) / (x.max() - x.min()))
+
+y = data[['gt']]									#Extracting only the action values
+data = data.drop(labels = ['gt'], axis = 1)
+data = pd.get_dummies(data)							#For One Hot Encoding of the data
+parameters1 = len(data.columns)	
+
+y = pd.get_dummies(y)
+parameters2 = len(y.columns)
+
+
+data = np.array(data)
+y = np.array(y)
+
+m = len(data)
+crossval = round(3*m/4)								#Taking 75% of the data for training and rest 25% for testing
+train_data = data[0:crossval,:]
+train_data_y = y[0:crossval,:]
+
+test_data = data[crossval:,:]
+test_data_y = y[crossval:,:]
+
+X_train = train_data 								#Reshaping the data into the form required for LSTM
+X_train = np.reshape(X_train, (X_train.shape[0],1,X_train.shape[1]))
+X_train = np.array(X_train)
+
+y_train = train_data_y
+
+X_test = test_data
+X_test = np.reshape(X_test, (X_test.shape[0],1,X_test.shape[1]))
+X_test = np.array(X_test)
+
+y_test = test_data_y
+np.random.seed(7)
+
+#Making the LSTM model
+model = Sequential()
+model.add(LSTM(24, input_dim = parameters1,return_sequences=True))
+model.add(LSTM(12))
+model.add(Dense(6, activation='sigmoid'))
+model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+
+print(model.summary())
+
+Accuracy_Arr = np.empty(0)
+batch_size_Arr = np.empty(0)
+#Fitting data
+batch_side = 8
+for i in range(5,batch_side):
+	model.fit(X_train, y_train, epochs=3, batch_size=pow(2,i))
+	scores = model.evaluate(X_test, y_test, verbose=0)
+	print("Accuracy: %.2f%%" % (scores[1]*100))
+	Accuracy_Arr = np.append(Accuracy_Arr,scores[1]*100)
+	batch_size_Arr = np.append(batch_size_Arr, pow(2,i))
+
+fig = plt.figure()								#For making a plot of Accuracy vs batch size
+plt.plot(batch_size_Arr, Accuracy_Arr)
+plt.xlabel("Batch size")
+plt.ylabel("Accuracy of Model(in %)")
+plt.show()
+fig.save("Merged_data_Accuracy_vs_batch_size.png")
+
+model.save("my_model.h5")						#For saving the model
@@ -0,0 +1,119 @@
+import os
+import numpy as np
+from sklearn.neural_network import MLPClassifier
+from sklearn.metrics import classification_report,f1_score
+from sklearn.model_selection import train_test_split
+import matplotlib.pyplot as plt
+import time
+
+##################
+from sklearn.ensemble import RandomForestClassifier
+###################
+
+#################################### Import Data ##########################################
+def import_data(file):
+    file_dir = os.path.dirname(__file__)
+    file_path= os.path.join(file_dir,'Compressed_data/'+file)            ### Make sure the dataset is in the correct folder
+    train_data= np.loadtxt(file_path, dtype= float , delimiter= ',', skiprows= 1)
+    return train_data
+
+############################################## Neural Network Implementation ####################################################
+def NeuralNetworkTrain(X_train,y_train,X_test,y_test):
+    train_scores = np.empty(0)
+    test_scores = np.empty(0)
+    indices = np.empty(0)
+    for i in 5, 10, 15:                    #### We are taking only one hidden layer, try with different number of layers
+        print("hidden layer: ",i,"\n")
+        mlp = MLPClassifier(hidden_layer_sizes=(i,i,i),early_stopping=True,learning_rate='adaptive',learning_rate_init=0.003)
+        mlp.fit(X_train,y_train)
+
+        predictions_train = mlp.predict(X_train)
+        print("Fitting of train data for size ",i," : \n",classification_report(y_train,predictions_train))
+
+        predictions_test = mlp.predict(X_test)
+        print("Fitting of test data for size ",i," : \n",classification_report(y_test,predictions_test))
+
+        train_scores = np.append(train_scores, f1_score(y_train,predictions_train,average='macro'))
+        test_scores = np.append(test_scores, f1_score(y_test,predictions_test,average='macro'))
+        indices = np.append(indices,i)
+
+    
+    plt.plot(indices, train_scores)
+    plt.plot(indices,test_scores)
+
+    plt.legend(['Train scores','Test scores'],loc='upper left')
+    plt.show()
+    
+
+###################################################################################################
+
+
+######################################### Random Forest Implementation ##############################################
+def RandomForestTrain(X_train,y_train,X_test,y_test):
+    train_scores = np.empty(0)
+    test_scores = np.empty(0)
+    indices = np.empty(0)
+    rf = RandomForestClassifier(n_estimators=100)
+    rf.fit(X_train,y_train)
+    predictions_train = rf.predict(X_train)
+    print("Fitting of train data : \n",classification_report(y_train,predictions_train))
+
+    predictions_test = rf.predict(X_test)
+    print("Fitting of test data for size : \n",classification_report(y_test,predictions_test))
+
+    #train_scores = np.append(train_scores, f1_score(y_train,predictions_train,average='macro'))
+    #test_scores = np.append(test_scores, f1_score(y_test,predictions_test,average='macro'))
+    #indices = np.append(indices,i)
+
+    '''
+    plt.plot(indices, train_scores)
+    plt.plot(indices,test_scores)
+
+    plt.legend(['Train scores','Test scores'],loc='upper left')
+    plt.show()
+    '''
+
+######################################################################################################
+
+
+start_time = time.time()        #### Tracking the execution time
+
+data = import_data('gdata.csv')            #### Importing data
+cross_val = 1060904              #### Row number after which the data of the last user is recorded (For the purpose of crossvalidation)
+m = 1060904                      #### Training only on data of two users, you guys can check out for 8 users (set m = 993720 for adata and m=1060904 for gdata)
+train_data = data[0:m,:]    
+test_data = data[cross_val:,:]
+
+#Setting up the feature matrix and output vector
+parameters = len(train_data[0,:]) - 2       #### Setting up the number of parameters
+
+X_train = train_data[:,1:parameters+1]
+y_train = train_data[:,parameters+1]
+
+X_test = test_data[:,1:parameters+1]
+y_test = test_data[:,parameters+1]
+
+
+NeuralNetworkTrain(X_train,y_train,X_test,y_test)
+
+data = import_data('adata.csv')            #### Importing data
+cross_val = 993720              #### Row number after which the data of the last user is recorded (For the purpose of crossvalidation)
+m = 993720                      #### Training only on data of two users, you guys can check out for 8 users (set m = 993720 for adata and m=1060904 for gdata)
+train_data = data[0:m,:]
+test_data = data[cross_val:,:]
+
+#Setting up the feature matrix and output vector
+parameters = len(train_data[0,:]) - 2       #### Setting up the number of parameters
+
+X_train = train_data[:,1:parameters+1]
+y_train = train_data[:,parameters+1]
+
+X_test = test_data[:,1:parameters+1]
+y_test = test_data[:,parameters+1]
+
+NeuralNetworkTrain(X_train,y_train,X_test,y_test)
+print ("time elapsed: ", format(time.time() - start_time)) #### This will take 6-7 minutes if you take the entire dataset
+#RandomForestTrain(X_train,y_train,X_test,y_test)
+#print ("time elapsed: ", format(time.time() - start_time)) #### This is going to take a lot of time maybe half an hour
+
+
@@ -0,0 +1,88 @@
+import os
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import time
+from keras.models import Sequential
+from keras.layers import Dense,LSTM,GRU,Dropout, Flatten
+
+#################################### Import Data ##########################################
+def import_data(file):
+	file_dir = os.path.dirname(__file__)
+	file_path= os.path.join(file_dir,'Compressed_data/'+file)            ### Make sure the dataset is in the correct folder
+	train_data= np.loadtxt(file_path, dtype= float , delimiter= ',', skiprows= 1)
+	return train_data
+
+def normalize(data_vector):
+	max_data = np.amax(data_vector)
+	min_data = np.amin(data_vector)
+	data_vector = -1 + 2*(data_vector -min_data)/(max_data - min_data)
+	return data_vector
+
+#### Tracking the execution time
+start_time = time.time()
+
+#### Importing data
+print('Loading data ...')
+data = import_data('adata.csv')
+
+m = len(data)
+crossval = round(3*m/4)
+train_data = data[0:crossval,:]
+test_data = data[crossval:,:]
+
+#### Setting up the number of parameters
+parameters = len(train_data[0,:]) - 2
+
+#### Normalizing the data
+for x in range(1,parameters+1):
+	train_data[:,x] = normalize(train_data[:,x])
+	test_data[:,x] = normalize(test_data[:,x])
+
+
+
+X_train = train_data[:,1:parameters+1]
+
+X_train = np.reshape(X_train, (X_train.shape[0],1,X_train.shape[1]))
+X_train = np.array(X_train)
+
+y_train = train_data[:,parameters+1]
+y_train = pd.get_dummies(y_train)
+y_train = np.array(y_train)
+
+X_test = test_data[:,1:parameters+1]
+X_test = np.reshape(X_test, (X_test.shape[0],1,X_test.shape[1]))
+X_test = np.array(X_test)
+
+y_test = test_data[:,parameters+1]
+y_test = pd.get_dummies(y_test)
+y_test = np.array(y_test)
+
+np.random.seed(7)
+
+AccuracyArr = np.empty(0)
+sArr = np.empty(0)
+for s in 6,12,18:
+	model = Sequential()
+	model.add(LSTM(s, input_shape=(None, parameters), return_sequences= False))
+	#model.add(Dropout(0.9))
+	#model.add(LSTM(18, return_sequences=True))
+	#model.add(Dropout(0.2))
+	#model.add(LSTM(12))
+	model.add(Dense(6, activation='sigmoid'))
+	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+
+	print(model.summary())
+	model.fit(X_train, y_train, epochs=2, batch_size=40)
+	# Final evaluation of the model
+	scores = model.evaluate(X_test, y_test, verbose=0)
+	AccuracyArr = np.append(AccuracyArr,(scores[1]*100))
+	sArr = np.append(sArr,s)
+	print("Accuracy: %.2f%%" % (scores[1]*100))
+
+fig = plt.figure()
+plt.plot(sArr,AccuracyArr)
+plt.xlabel('#LSTM neurons')
+plt.ylabel('Accuracy of model (in %)')
+plt.show()
+fig.savefig('Accelerometer.png')