4
4
# Rscript filter_genes.R -f <input file> -o <output file>
5
5
6
6
# load packages that are provided in the conda env
7
- options(show.error.messages = FALSE ,
8
- error = function () {
9
- cat(geterrmessage(), file = stderr())
10
- q(" no" , 1 , FALSE )
11
- }
7
+ options(
8
+ show.error.messages = FALSE ,
9
+ error = function () {
10
+ cat(geterrmessage(), file = stderr())
11
+ q(" no" , 1 , FALSE )
12
+ }
12
13
)
13
14
loc <- Sys.setlocale(" LC_MESSAGES" , " en_US.UTF-8" )
14
15
library(optparse )
15
16
16
17
# Arguments
17
18
option_list <- list (
18
- make_option(
19
- c(" -f" , " --input" ),
20
- default = NA ,
21
- type = " character" ,
22
- help = " Input file that contains count values to filter"
23
- ),
24
- make_option(
25
- c(" -s" , " --sep" ),
26
- default = " \t " ,
27
- type = " character" ,
28
- help = " File separator [default : '%default' ]"
29
- ),
30
- make_option(
31
- c(" -c" , " --colnames" ),
32
- default = TRUE ,
33
- type = " logical" ,
34
- help = " first line is a header [default : '%default' ]"
35
- ),
36
- make_option(
37
- " --percentile_detection" ,
38
- default = 0 ,
39
- type = " numeric" ,
40
- help = " Include genes with detected expression in at least \
19
+ make_option(
20
+ c(" -f" , " --input" ),
21
+ default = NA ,
22
+ type = " character" ,
23
+ help = " Input file that contains count values to filter"
24
+ ),
25
+ make_option(
26
+ c(" -s" , " --sep" ),
27
+ default = " \t " ,
28
+ type = " character" ,
29
+ help = " File separator [default : '%default' ]"
30
+ ),
31
+ make_option(
32
+ c(" -c" , " --colnames" ),
33
+ default = TRUE ,
34
+ type = " logical" ,
35
+ help = " first line is a header [default : '%default' ]"
36
+ ),
37
+ make_option(
38
+ " --percentile_detection" ,
39
+ default = 0 ,
40
+ type = " numeric" ,
41
+ help = " Include genes with detected expression in at least \
41
42
this fraction of cells [default : '%default' ]"
42
- ),
43
- make_option(
44
- " --absolute_detection" ,
45
- default = 0 ,
46
- type = " numeric" ,
47
- help = " Include genes with detected expression in at least \
43
+ ),
44
+ make_option(
45
+ " --absolute_detection" ,
46
+ default = 0 ,
47
+ type = " numeric" ,
48
+ help = " Include genes with detected expression in at least \
48
49
this number of cells [default : '%default' ]"
49
- ),
50
- make_option(
51
- c(" -o" , " --output" ),
52
- default = NA ,
53
- type = " character" ,
54
- help = " Output name [default : '%default' ]"
55
- )
50
+ ),
51
+ make_option(
52
+ c(" -o" , " --output" ),
53
+ default = NA ,
54
+ type = " character" ,
55
+ help = " Output name [default : '%default' ]"
56
+ )
56
57
)
57
58
58
59
opt <- parse_args(OptionParser(option_list = option_list ),
59
- args = commandArgs(trailingOnly = TRUE ))
60
+ args = commandArgs(trailingOnly = TRUE )
61
+ )
60
62
if (opt $ sep == " tab" ) {
61
- opt $ sep <- " \t "
63
+ opt $ sep <- " \t "
62
64
}
63
65
if (opt $ sep == " comma" ) {
64
- opt $ sep <- " ,"
66
+ opt $ sep <- " ,"
65
67
}
66
68
67
69
# Open files
68
70
data.counts <- read.delim(
69
- opt $ input ,
70
- h = opt $ colnames ,
71
- row.names = 1 ,
72
- sep = opt $ sep ,
73
- check.names = FALSE
71
+ opt $ input ,
72
+ h = opt $ colnames ,
73
+ row.names = 1 ,
74
+ sep = opt $ sep ,
75
+ check.names = FALSE
74
76
)
75
77
76
78
# note the [if else] below, to handle percentile_detection=absolute_detection=0
77
79
# Search for genes that are expressed in a certain percent of cells
78
80
if (opt $ percentile_detection > 0 ) {
79
- kept_genes <- rowSums(data.counts != 0 ) > = (opt $ percentile_detection * ncol(data.counts ))
81
+ kept_genes <- rowSums(data.counts != 0 ) > = (opt $ percentile_detection * ncol(data.counts ))
80
82
} else {
81
-
82
- # Search for genes that are expressed in more than an absolute number of cells
83
- kept_genes <- rowSums(data.counts != 0 ) > = (opt $ absolute_detection )
83
+ # Search for genes that are expressed in more than an absolute number of cells
84
+ kept_genes <- rowSums(data.counts != 0 ) > = (opt $ absolute_detection )
84
85
}
85
86
86
87
# Filter matrix
@@ -89,10 +90,10 @@ data.counts <- cbind(Genes = rownames(data.counts), data.counts)
89
90
90
91
# Save filtered matrix
91
92
write.table(
92
- data.counts ,
93
- opt $ output ,
94
- sep = " \t " ,
95
- quote = FALSE ,
96
- col.names = TRUE ,
97
- row.names = FALSE
93
+ data.counts ,
94
+ opt $ output ,
95
+ sep = " \t " ,
96
+ quote = FALSE ,
97
+ col.names = TRUE ,
98
+ row.names = FALSE
98
99
)
0 commit comments