-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcscAssign1.R
122 lines (85 loc) · 4.28 KB
/
cscAssign1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
working_dir<-"\\afs\\unity.ncsu.edu\\users\\e\\efarhan\\csc-591"
file1<-"student_prb_clean.csv"
file2<-"student_step_clean.csv"
### concatenate path location to filename
input_stdProb<-paste(working_dir,"\\",file1, sep="")
input_stdStep<-paste(working_dir,"\\", file2,sep="")
###read .csv files
std_prb<-read.table(file1, sep=",",header=T,check.names = FALSE)
std_step<-read.table(file2, sep=",",header=T, check.names = FALSE)
## Extract columns "Anon Student Id", "Correct First Attempts" from std_prb file
col_std_prb<-c("Anon Student Id", "Correct First Attempts")#"Anon Student Id", "Correct First Attempts"
subset_std_prob<-std_prb[,col_std_prb]
## Extract columns "Anon Student Id", "Correct First Attempts" from std_step file
col_std_step<-c("Anon Student Id", "Step Duration (sec)")#"Anon Student Id", "Step Duration"
subset_std_step<-std_step[, col_std_step]
##get all distinct student names from subset_std_step (subset_std_prob)
std_names<-as.character(unique(subset_std_step[,1]))
### create an empty data frame to fill up std_name, total corr attempts, and total step duration
b22<-data.frame(name_of_std=character(),
total_corr_attempts=integer(),
total_step_duration=double(),
stringsAsFactors=FALSE)
for(i in 1:length(std_names))
{
### get a std name, and get row indices of correct first attempts and step duration for that student from two different data frames
curr_std<-std_names[i]
get_attemt_rows<-which(subset_std_prob[,1]==curr_std)
get_step_rows<-which(subset_std_step[,1]==curr_std)
###### Extract the values of correct first attempts from row indices
get_attemt_row_extract<-subset_std_prob[get_attemt_rows,]
get_attemt_val<-get_attemt_row_extract[,2]
###### Extract the values of step duration(sec) from row indices
get_step_row_extract<-subset_std_step[get_step_rows,]
get_step_dur<- get_step_row_extract[,2]
###sume up values of total correct first attempts and step duration(sec), and add to the new data frame created, b22
b22[i,1]<-curr_std
b22[i,2]<-sum(get_attemt_val)
b22[i,3]<-sum(get_step_dur)
}
##### Find outliers of extracted two measures and compare with and without outlier values
to_remove_again<-c()
### for each column find outliers identify corresponding rows, and add them to_remove vector
## Exclude first column, this contains student names
col_b22<-names(b22)[-1]
for (Var in col_b22) {
outliers<-boxplot.stats(b22[,Var])$out
# cat("out ", outliers, "\n")
outlier_rows<-c()
j<-1
while( j<=length(outliers)){
outlier_rows<-c(outlier_rows, which(b22[,Var]==outliers[j]))
j<-j+1
}
to_remove_again<-c(to_remove_again, outlier_rows)
}
### View the two basic per student performance statistics, with outliers
View(b22)
###remove outlier rows, if outlier_rows>0
b22_clean<-b22
if(length(to_remove_again)>0)
b22_clean<-b22[-to_remove_again,]
####This code segment outputs statistics with outliers
cat("First Corr. Attempts Step Duration (sec)")
cat(" mean with outliers", mean(b22[,2]), " ", mean(b22[,3]), "\n")
cat(" median with outliers", median(b22[,2]), " ", median(b22[,3]), "\n")
cat(" max with outliers", max(b22[,2]), " ", max(b22[,3]),"\n")
cat(" min with outliers", min(b22[,2]), " ", min(b22[,3]), "\n")
####This code segment outputs statistics without outliers
if(length(to_remove_again)>0){
cat(" mean w/o outliers", mean(b22_clean[,2]), " ", mean(b22_clean[,3]), "\n")
cat(" median w/o outliers",median(b22_clean[,2]), " ", median(b22_clean[,3]), "\n")
cat(" median w/o outliers", max(b22_clean[,2]), " ", max(b22_clean[,3]), min(b22_clean[,2]), " ", min(b22_clean[,3]), "\n")
}
###Plotting for Correct First Attempts
attemps<-b22_clean[,2]
z.norm<-(attemps-mean(attemps))/sd(attemps)## standardize points
qqnorm(z.norm) ## drawing the QQplot
abline(0,1, col="blue") ## drawing a 45-degree reference line
hist(attemps,main="Histogram of Correct First Attempts")
###Plotting for Correct First Attempts
durations<-b22_clean[,3]
z.norm<-(durations-mean(durations))/sd(durations)
qqnorm(z.norm) ## drawing the QQplot
abline(0,1, col="red") ## drawing a 45-degree reference line
hist(durations,main="Histogram of Total Step Duration (sec)")