Skip to content

Commit 023776e

Browse files
authored
Merge pull request #706 from ARTbio/gsc_filter_genes
Update Gsc filter genes tool
2 parents 44d7e10 + 0c4a3bb commit 023776e

File tree

3 files changed

+65
-60
lines changed

3 files changed

+65
-60
lines changed

tools/gsc_filter_genes/.shed.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ description: Filter genes that are detected in less than a fraction of libraries
55
long_description:
66
categories:
77
- Transcriptomics
8+
- Single Cell
89
homepage_url: http://artbio.fr
910
remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_filter_genes
1011
toolshed:

tools/gsc_filter_genes/filter_genes.R

Lines changed: 60 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,83 +4,84 @@
44
# Rscript filter_genes.R -f <input file> -o <output file>
55

66
# load packages that are provided in the conda env
7-
options(show.error.messages = FALSE,
8-
error = function() {
9-
cat(geterrmessage(), file = stderr())
10-
q("no", 1, FALSE)
11-
}
7+
options(
8+
show.error.messages = FALSE,
9+
error = function() {
10+
cat(geterrmessage(), file = stderr())
11+
q("no", 1, FALSE)
12+
}
1213
)
1314
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
1415
library(optparse)
1516

1617
# Arguments
1718
option_list <- list(
18-
make_option(
19-
c("-f", "--input"),
20-
default = NA,
21-
type = "character",
22-
help = "Input file that contains count values to filter"
23-
),
24-
make_option(
25-
c("-s", "--sep"),
26-
default = "\t",
27-
type = "character",
28-
help = "File separator [default : '%default' ]"
29-
),
30-
make_option(
31-
c("-c", "--colnames"),
32-
default = TRUE,
33-
type = "logical",
34-
help = "first line is a header [default : '%default' ]"
35-
),
36-
make_option(
37-
"--percentile_detection",
38-
default = 0,
39-
type = "numeric",
40-
help = "Include genes with detected expression in at least \
19+
make_option(
20+
c("-f", "--input"),
21+
default = NA,
22+
type = "character",
23+
help = "Input file that contains count values to filter"
24+
),
25+
make_option(
26+
c("-s", "--sep"),
27+
default = "\t",
28+
type = "character",
29+
help = "File separator [default : '%default' ]"
30+
),
31+
make_option(
32+
c("-c", "--colnames"),
33+
default = TRUE,
34+
type = "logical",
35+
help = "first line is a header [default : '%default' ]"
36+
),
37+
make_option(
38+
"--percentile_detection",
39+
default = 0,
40+
type = "numeric",
41+
help = "Include genes with detected expression in at least \
4142
this fraction of cells [default : '%default' ]"
42-
),
43-
make_option(
44-
"--absolute_detection",
45-
default = 0,
46-
type = "numeric",
47-
help = "Include genes with detected expression in at least \
43+
),
44+
make_option(
45+
"--absolute_detection",
46+
default = 0,
47+
type = "numeric",
48+
help = "Include genes with detected expression in at least \
4849
this number of cells [default : '%default' ]"
49-
),
50-
make_option(
51-
c("-o", "--output"),
52-
default = NA,
53-
type = "character",
54-
help = "Output name [default : '%default' ]"
55-
)
50+
),
51+
make_option(
52+
c("-o", "--output"),
53+
default = NA,
54+
type = "character",
55+
help = "Output name [default : '%default' ]"
56+
)
5657
)
5758

5859
opt <- parse_args(OptionParser(option_list = option_list),
59-
args = commandArgs(trailingOnly = TRUE))
60+
args = commandArgs(trailingOnly = TRUE)
61+
)
6062
if (opt$sep == "tab") {
61-
opt$sep <- "\t"
63+
opt$sep <- "\t"
6264
}
6365
if (opt$sep == "comma") {
64-
opt$sep <- ","
66+
opt$sep <- ","
6567
}
6668

6769
# Open files
6870
data.counts <- read.delim(
69-
opt$input,
70-
h = opt$colnames,
71-
row.names = 1,
72-
sep = opt$sep,
73-
check.names = FALSE
71+
opt$input,
72+
h = opt$colnames,
73+
row.names = 1,
74+
sep = opt$sep,
75+
check.names = FALSE
7476
)
7577

7678
# note the [if else] below, to handle percentile_detection=absolute_detection=0
7779
# Search for genes that are expressed in a certain percent of cells
7880
if (opt$percentile_detection > 0) {
79-
kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts))
81+
kept_genes <- rowSums(data.counts != 0) >= (opt$percentile_detection * ncol(data.counts))
8082
} else {
81-
82-
# Search for genes that are expressed in more than an absolute number of cells
83-
kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection)
83+
# Search for genes that are expressed in more than an absolute number of cells
84+
kept_genes <- rowSums(data.counts != 0) >= (opt$absolute_detection)
8485
}
8586

8687
# Filter matrix
@@ -89,10 +90,10 @@ data.counts <- cbind(Genes = rownames(data.counts), data.counts)
8990

9091
# Save filtered matrix
9192
write.table(
92-
data.counts,
93-
opt$output,
94-
sep = "\t",
95-
quote = FALSE,
96-
col.names = TRUE,
97-
row.names = FALSE
93+
data.counts,
94+
opt$output,
95+
sep = "\t",
96+
quote = FALSE,
97+
col.names = TRUE,
98+
row.names = FALSE
9899
)

tools/gsc_filter_genes/filter_genes.xml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
<tool id="filter_genes" name="Filter genes in single cell data" version="4.3.1+galaxy0" profile="21.01">
1+
<tool id="filter_genes" name="Filter genes in single cell data" version="4.3.1+galaxy1" profile="21.01">
22
<description>which are detected in less that a given fraction of the libraries</description>
3+
<xrefs>
4+
<xref type="bio.tools">galaxy_single_cell_suite</xref>
5+
</xrefs>
36
<requirements>
47
<requirement type="package" version="1.7.3">r-optparse</requirement>
58
</requirements>

0 commit comments

Comments
 (0)