-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
117 lines (76 loc) · 4.27 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
## This R script is for Getting & Cleaning Data project
## This project is to collect , work with and clean
## a dataset
## This script uses the plyr library for split and apply combine pattern
library(plyr)
## This is the original zip file which has the dataset to be cleaned
setwd("C:/Program Files/RStudio/getdata_projectfiles_UCI HAR Dataset/UCI HAR Dataset")
## set your working directory as per your system and change the path for
## setwd as per your machine
zipfilename <- "getdata_dataset.zip"
## Download and unzip the dataset:
if (!file.exists(zipfilename)){
fileURL <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip "
download.file(fileURL, zipfilename)
}
if (!file.exists("UCI HAR Dataset")) {
unzip(zipfilename)
}
# Step 1
# This includes Merging the training and test sets to create one data set
#########################################################################
#########################################################################
#########################################################################
x_read_training <- read.table("train/X_train.txt")
y_read_training <- read.table("train/y_train.txt")
subject_read_training <- read.table("train/subject_train.txt")
x_read_test <- read.table("test/X_test.txt")
y_read_test <- read.table("test/y_test.txt")
subject_read_test <- read.table("test/subject_test.txt")
# From this I will create 'x' dataset
x_read_data <- rbind(x_read_training, x_read_test)
# From this I will also create 'y' dataset
y_read_data <- rbind(y_read_training, y_read_test)
# Now I will create a 'subject' dataset
subject_read_data <- rbind(subject_read_training, subject_read_test)
# Step 2
# Extract only the measurements on the mean and standard deviation for each measurement
######################################################################################
######################################################################################
######################################################################################
read_features <- read.table("features.txt")
# Extract only columns with mean() or std() in their names for each measurement
mean_std_features <- grep("-(mean|std)\\(\\)", read_features[, 2])
# Get and store the subset of the desired columns
x_read_data <- x_read_data[, mean_std_features]
# Correct the column names with appropriate variables
names(x_read_data) <- read_features[mean_std_features, 2]
# Step 3
# Use the descriptive activity names to name the activities in the dataset
##########################################################################
##########################################################################
##########################################################################
read_activity_names <- read.table("activity_labels.txt")
# Update labels in the dataset with correct activity names
y_read_data[, 1] <- read_activity_names[y_read_data[, 1], 2]
# Correct column name and assign column name as activity
names(y_read_data) <- "activity"
# Step 4
# Appropriately label the data set with descriptive variable names
##################################################################
##################################################################
##################################################################
# Correct column name and assign column name as subject
names(subject_read_data) <- "subject"
# Binding or merging all the data in a single dataset
bind_all_data <- cbind(x_read_data, y_read_data, subject_read_data)
# Step 5
# From the dataset above create a second, independent tidy data set with the
# average of each variable for each activity and each subject
############################################################################
############################################################################
############################################################################
new_average_data_set <- ddply(bind_all_data, .(subject, activity),
function(x) colMeans(x[, 1:66]))
## Writes the new cleaned and mereged data in the text file
write.table(new_average_data_set, "tidy.txt", row.name=FALSE)