-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
92 lines (67 loc) · 2.13 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Project for Getting and Cleaning Data
library("readr")
library("dplyr")
# test set
test <- read_table('./UCI HAR Dataset/test/X_test.txt',col_names=FALSE)
str(test)
# supply variable names for test
varnames <- read.csv('./UCI HAR Dataset/features.txt',header=FALSE, sep = ' ')
head(varnames)
names(test) <- varnames[,2]
ytest <- read.csv('./UCI HAR Dataset/test/y_test.txt',header=FALSE)
names(ytest) <- c('activity')
subj_test <- read.csv('./UCI HAR Dataset/test/subject_test.txt',header=FALSE)
names(subj_test) <- c('subject')
# names(test)[!duplicated(names(test))]
test2 <- test[,names(test)[!duplicated(names(test))]]
test2a <- test2 %>%
select(contains('mean()',ignore.case = FALSE))
test2b <- test2 %>%
select(contains('std()',ignore.case = FALSE))
# training set
train <- read_table('./UCI HAR Dataset/train/X_train.txt',col_names=FALSE)
str(train)
# supply variable names for train
names(train) <- varnames[,2]
ytrain <- read.csv('./UCI HAR Dataset/train/y_train.txt',header=FALSE)
names(ytrain) <- c('activity')
subj_train <- read.csv('./UCI HAR Dataset/train/subject_train.txt',header=FALSE)
names(subj_train) <- c('subject')
train2 <- train[,names(train)[!duplicated(names(train))]]
train2a <- train2 %>%
select(contains('mean()',ignore.case = FALSE))
train2b <- train2 %>%
select(contains('std()',ignore.case = FALSE))
#combine datasets
test3 <- cbind(ytest,test2a,test2b,subj_test)
train3 <- cbind(ytrain,train2a,train2b,subj_train)
samsung <- rbind(test3,train3)
#label activity
activity_name <- read.csv('./UCI HAR Dataset/activity_labels.txt',header=FALSE,sep = ' ')
names(activity_name) <- c('activity','label')
samsung$activity <- factor(samsung$activity,labels = activity_name$label)
samsung$subject <- factor(samsung$subject)
str(samsung)
#clean up
rm(test2a)
rm(test2b)
rm(train2a)
rm(train2b)
rm(test)
rm(train)
rm(test2)
rm(train2)
rm(subj_test)
rm(subj_train)
rm(test3)
rm(train3)
rm(ytest)
rm(ytrain)
rm(varnames)
rm(activity_name)
# create summary dataset
samsum <- samsung %>%
group_by(subject,activity) %>%
summarise_each(funs(mean))
head(samsum)
write.table(samsum,'Samsung_Avg.txt',row.names=FALSE)