-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdoCrossValidation.R
98 lines (73 loc) · 2.6 KB
/
doCrossValidation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
doCrossValidation <- function(inMat, kfold = 10, numSplit = 5) {
# INPUT
# inMat: input matrix
# kfold: k-fold cross-validation
# numSplit: number of splits
# OUTPUT
# a list of multiple components
# library(reshape2)
# library(data.table)
# remove row names and col names
# must do it, since we need index not row names and col names
# when do melt operation below
rownames(inMat) <- NULL
colnames(inMat) <- NULL
# width -> length, stack by column
# i, j, v format
triplet <- as.data.table(reshape2::melt(inMat))
setnames(triplet, c("rowIndex", "colIndex", "value"))
numTri <- nrow(triplet)
##################### nested list ############
# save final list
# nested list, should define two lists
savedFolds <- vector(mode = "list", length = numSplit)
names(savedFolds) <- paste0("split_", 1:numSplit)
# save kfold list for each split
cvFolds <- vector("list", length = kfold)
names(cvFolds) <- paste0("fold_", 1:kfold)
##################################################
source("getCvIndex.R")
for (i in 1:numSplit) {
#######
folds <- getCvIndex(totNum = numTri, nfold = kfold)
#######
for (j in 1:kfold) {
currIndex <- folds[[j]]
testData <- triplet[currIndex]
# test labels: used for calculating AUPR and AUC
testLabel <- testData[, value]
testIndex <- testData[, 1:2, with = FALSE]
testIndex <- as.matrix(testIndex)
testIndexRow <- testIndex[, "rowIndex"]
testIndexCol <- testIndex[, "colIndex"]
# known information for drug-target matrix
tmpTriplet <- triplet
tmpTriplet[currIndex, "value"] <- 0
knownInteraction <- tmpTriplet[value > 0]
knownDrugIndex <- unique(knownInteraction[, rowIndex])
knownTargetIndex <- unique(knownInteraction[, colIndex])
# fold matrix in the test set
tmp <- inMat
tmp[testIndex] <- 0
# 1
cvFolds[[j]]$testLabel <- testLabel
# 2
cvFolds[[j]]$testIndex <- testIndex
# 3
cvFolds[[j]]$testIndexRow <- testIndexRow
# 4
cvFolds[[j]]$testIndexCol <- testIndexCol
# 5
cvFolds[[j]]$knownDrugIndex <- knownDrugIndex
# 6
cvFolds[[j]]$knownTargetIndex <- knownTargetIndex
# 7
cvFolds[[j]]$foldMat <- tmp
}
savedFolds[[i]] <- cvFolds
}
cat("save 'savedFolds.RData' to disk! \n")
flush.console()
save(savedFolds, file = "savedFolds.RData")
return(savedFolds)
}