-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpreprocess_2.R
78 lines (51 loc) · 1.88 KB
/
preprocess_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
library(zoo)
working_dir<-"\\afs\\unity.ncsu.edu\\users\\e\\efarhan\\csc-591"
#setwd(working_dir)
### .txt files downloaded from Datashop
file1<-"student_problem.txt"
file2<-"student_step.txt"
### concatenate path location to filename
input_stdProb<-paste(working_dir,"\\",file1, sep="")
input_stdStep<-paste(working_dir,"\\", file2,sep="")
### read files
std_prb<-read.table(file1,sep="\t",header=T, check.names = FALSE, na.strings=c(""," ",".","NA"))
std_step<-read.table(file2,sep="\t",header=T, check.names = FALSE, na.strings=c(""," ",".","NA"))
##polynomial interpolating missing values in std_prb file
### if all values of a column are NA, then replace it by zeroes
for (Var in names(std_prb)) {
missing <- sum(is.na(std_prb[,Var]))
if (missing > 0) {
if(missing==nrow(std_prb)){
std_prb[, Var]<-rep(0,nrow(std_prb))
#cat(" all NA \n")
}
else{
interploated<-na.spline(std_prb[, Var])
std_prb[, Var]<-interploated
#cat(" yey", Var, "\n")
}
}##end for missing >0
}
##polynomial interpolating missing values in std_step file
### if all values of a column are NA, then replace it by zeroes
for (Var in names(std_step)) {
missing <- sum(is.na(std_step[,Var]))
if (missing > 0) {
if(missing==nrow(std_step))
std_step[, Var]<-rep(0,nrow(std_step))
else{
interploated<-na.spline(std_step[, Var])
std_step[, Var]<-interploated
#cat(" yey", Var, "\n")
}
}##end for missing >0
}
###write files as csv
std_prb_clean<-as.data.frame(std_prb)
csvFile1_name<-"student_prb_clean.csv"
csvFile1<-paste(csvFile1_name, sep="")
write.csv(std_prb_clean, csvFile1)
std_step_clean<-as.data.frame(std_step)
csvFile2_name<-"student_step_clean.csv"
csvFile2<-paste(csvFile2_name, sep="")
write.csv(std_step_clean, csvFile2)