Skip to content

Commit ae36c63

Browse files
authored
Merge pull request #711 from ARTbio/gsc_signature_score
Update Gsc_signature_score
2 parents 7343be2 + 3a8b848 commit ae36c63

File tree

3 files changed

+150
-133
lines changed

3 files changed

+150
-133
lines changed

tools/gsc_signature_score/.shed.yml

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ long_description: |
66
Compute signature scores from single cell RNAseq data
77
categories:
88
- Transcriptomics
9+
- Single Cell
910
homepage_url: http://artbio.fr
1011
remote_repository_url: https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_signature_score
1112
toolshed:

tools/gsc_signature_score/signature_score.R

+145-132
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
# Compute the signature score based on the geometric mean of the target gene expression
33
# and split cells in 2 groups (high/low) using this signature score.
44

5-
options(show.error.messages = FALSE,
6-
error = function() {
7-
cat(geterrmessage(), file = stderr())
8-
q("no", 1, FALSE)
9-
}
5+
options(
6+
show.error.messages = FALSE,
7+
error = function() {
8+
cat(geterrmessage(), file = stderr())
9+
q("no", 1, FALSE)
10+
}
1011
)
1112
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
1213
warnings()
@@ -18,93 +19,95 @@ library(gridExtra)
1819

1920
# Arguments
2021
option_list <- list(
21-
make_option(
22-
"--input",
23-
default = NA,
24-
type = "character",
25-
help = "Input file that contains log2(CPM +1) values"
26-
),
27-
make_option(
28-
"--sep",
29-
default = "\t",
30-
type = "character",
31-
help = "File separator [default : '%default' ]"
32-
),
33-
make_option(
34-
"--colnames",
35-
default = TRUE,
36-
type = "logical",
37-
help = "Consider first line as header ? [default : '%default' ]"
38-
),
39-
make_option(
40-
"--genes",
41-
default = NA,
42-
type = "character",
43-
help = "List of genes comma separated"
44-
),
45-
make_option(
46-
"--percentile_threshold",
47-
default = 20,
48-
type = "integer",
49-
help = "detection threshold to keep a gene in signature set [default : '%default' ]"
50-
),
51-
make_option(
52-
"--output",
53-
default = "./output.tab",
54-
type = "character",
55-
help = "Output path [default : '%default' ]"
56-
),
57-
make_option(
58-
"--stats",
59-
default = "./statistics.tab",
60-
type = "character",
61-
help = "statistics path [default : '%default' ]"
62-
),
63-
make_option(
64-
"--correlations",
65-
default = "./correlations.tab",
66-
type = "character",
67-
help = "Correlations between signature genes [default : '%default' ]"
68-
),
69-
make_option(
70-
"--covariances",
71-
default = "./statistics.tab",
72-
type = "character",
73-
help = "Covariances between signature genes [default : '%default' ]"
74-
),
75-
make_option(
76-
"--pdf",
77-
default = "~/output.pdf",
78-
type = "character",
79-
help = "pdf path [default : '%default' ]"
80-
)
22+
make_option(
23+
"--input",
24+
default = NA,
25+
type = "character",
26+
help = "Input file that contains log2(CPM +1) values"
27+
),
28+
make_option(
29+
"--sep",
30+
default = "\t",
31+
type = "character",
32+
help = "File separator [default : '%default' ]"
33+
),
34+
make_option(
35+
"--colnames",
36+
default = TRUE,
37+
type = "logical",
38+
help = "Consider first line as header ? [default : '%default' ]"
39+
),
40+
make_option(
41+
"--genes",
42+
default = NA,
43+
type = "character",
44+
help = "List of genes comma separated"
45+
),
46+
make_option(
47+
"--percentile_threshold",
48+
default = 20,
49+
type = "integer",
50+
help = "detection threshold to keep a gene in signature set [default : '%default' ]"
51+
),
52+
make_option(
53+
"--output",
54+
default = "./output.tab",
55+
type = "character",
56+
help = "Output path [default : '%default' ]"
57+
),
58+
make_option(
59+
"--stats",
60+
default = "./statistics.tab",
61+
type = "character",
62+
help = "statistics path [default : '%default' ]"
63+
),
64+
make_option(
65+
"--correlations",
66+
default = "./correlations.tab",
67+
type = "character",
68+
help = "Correlations between signature genes [default : '%default' ]"
69+
),
70+
make_option(
71+
"--covariances",
72+
default = "./statistics.tab",
73+
type = "character",
74+
help = "Covariances between signature genes [default : '%default' ]"
75+
),
76+
make_option(
77+
"--pdf",
78+
default = "~/output.pdf",
79+
type = "character",
80+
help = "pdf path [default : '%default' ]"
81+
)
8182
)
8283

8384
opt <- parse_args(OptionParser(option_list = option_list),
84-
args = commandArgs(trailingOnly = TRUE))
85+
args = commandArgs(trailingOnly = TRUE)
86+
)
8587

8688
if (opt$sep == "tab") {
87-
opt$sep <- "\t"
89+
opt$sep <- "\t"
8890
}
8991
if (opt$sep == "comma") {
90-
opt$sep <- ","
92+
opt$sep <- ","
9193
}
9294

9395
# Take input data
9496
data.counts <- read.table(
95-
opt$input,
96-
h = opt$colnames,
97-
row.names = 1,
98-
sep = opt$sep,
99-
check.names = FALSE
97+
opt$input,
98+
h = opt$colnames,
99+
row.names = 1,
100+
sep = opt$sep,
101+
check.names = FALSE
100102
)
101103

102104
# Get vector of target genes
103105
genes <- unlist(strsplit(opt$genes, ","))
104106

105107
if (length(unique(genes %in% rownames(data.counts))) == 1) {
106-
if (unique(genes %in% rownames(data.counts)) == FALSE)
107-
stop("None of these genes are in your dataset: ", opt$genes)
108+
if (unique(genes %in% rownames(data.counts)) == FALSE) {
109+
stop("None of these genes are in your dataset: ", opt$genes)
110+
}
108111
}
109112

110113
logical_genes <- rownames(data.counts) %in% genes
@@ -116,10 +119,11 @@ signature.counts <- subset(data.counts, logical_genes)
116119
signature.covariances <- as.data.frame(cov(t(signature.counts)))
117120
signature.covariances <- cbind(gene = rownames(signature.covariances), signature.covariances)
118121
write.table(signature.covariances,
119-
file = opt$covariances,
120-
quote = FALSE,
121-
row.names = FALSE,
122-
sep = "\t")
122+
file = opt$covariances,
123+
quote = FALSE,
124+
row.names = FALSE,
125+
sep = "\t"
126+
)
123127

124128
# compute signature.correlations
125129
signature.correlations <- as.data.frame(cor(t(signature.counts)))
@@ -128,15 +132,15 @@ write.table(signature.correlations, file = opt$correlations, quote = FALSE, row.
128132

129133
## Descriptive Statistics Function
130134
descriptive_stats <- function(InputData) {
131-
SummaryData <- data.frame(
132-
mean = rowMeans(InputData),
133-
SD = apply(InputData, 1, sd),
134-
Variance = apply(InputData, 1, var),
135-
Percentage_Detection = apply(InputData, 1, function(x, y = InputData) {
136-
(sum(x != 0) / ncol(y)) * 100
137-
})
138-
)
139-
return(SummaryData)
135+
SummaryData <- data.frame(
136+
mean = rowMeans(InputData),
137+
SD = apply(InputData, 1, sd),
138+
Variance = apply(InputData, 1, var),
139+
Percentage_Detection = apply(InputData, 1, function(x, y = InputData) {
140+
(sum(x != 0) / ncol(y)) * 100
141+
})
142+
)
143+
return(SummaryData)
140144
}
141145

142146
signature_stats <- descriptive_stats(signature.counts)
@@ -146,21 +150,21 @@ kept_genes <- signature_stats$Percentage_Detection >= opt$percentile_threshold
146150

147151
# Add warnings
148152
if (length(unique(kept_genes)) > 1) {
149-
cat(
150-
"WARNINGS ! Following genes were removed from further analysis due to low gene expression :",
151-
paste(paste(rownames(signature.counts)[!kept_genes], round(signature_stats$Percentage_Detection[!kept_genes], 2), sep = " : "), collapse = ", "),
152-
"\n"
153-
)
154-
} else {
155-
if (unique(kept_genes) == FALSE) {
156-
stop(
157-
"None of these genes are detected in ",
158-
opt$percent,
159-
"% of your cells: ",
160-
paste(rownames(signature_stats), collapse = ", "),
161-
". You can be less stringent thanks to --percent parameter."
153+
cat(
154+
"WARNINGS ! Following genes were removed from further analysis due to low gene expression :",
155+
paste(paste(rownames(signature.counts)[!kept_genes], round(signature_stats$Percentage_Detection[!kept_genes], 2), sep = " : "), collapse = ", "),
156+
"\n"
162157
)
163-
}
158+
} else {
159+
if (unique(kept_genes) == FALSE) {
160+
stop(
161+
"None of these genes are detected in ",
162+
opt$percent,
163+
"% of your cells: ",
164+
paste(rownames(signature_stats), collapse = ", "),
165+
". You can be less stringent thanks to --percent parameter."
166+
)
167+
}
164168
}
165169

166170
# Remove genes poorly detected in the dataset
@@ -173,54 +177,63 @@ signature.counts[signature.counts == 0] <- 1
173177
score <- apply(signature.counts, 2, geometric.mean) # geometric.mean requires psych
174178

175179
# Add results in signature_output
176-
signature_output <- data.frame(cell = names(score),
177-
score = score,
178-
rate = ifelse(score > mean(score), "HIGH", "LOW"),
179-
nGenes = colSums(data.counts != 0),
180-
total_counts = colSums(data.counts))
180+
signature_output <- data.frame(
181+
cell = names(score),
182+
score = score,
183+
rate = ifelse(score > mean(score), "HIGH", "LOW"),
184+
nGenes = colSums(data.counts != 0),
185+
total_counts = colSums(data.counts)
186+
)
181187

182188
# statistics of input genes, signature genes first lines
183-
statistics.counts <- rbind(subset(data.counts, logical_genes),
184-
subset(data.counts, !logical_genes))
189+
statistics.counts <- rbind(
190+
subset(data.counts, logical_genes),
191+
subset(data.counts, !logical_genes)
192+
)
185193
statistics <- descriptive_stats(statistics.counts)
186194
statistics <- cbind(gene = rownames(statistics), statistics)
187195

188196

189197

190198
# Re-arrange score matrix for plots
191-
score <- data.frame(score = score,
192-
order = rank(score, ties.method = "first"),
193-
signature = signature_output$rate,
194-
stringsAsFactors = FALSE)
199+
score <- data.frame(
200+
score = score,
201+
order = rank(score, ties.method = "first"),
202+
signature = signature_output$rate,
203+
stringsAsFactors = FALSE
204+
)
195205

196206
pdf(file = opt$pdf)
197207
myplot <- ggplot(signature_output, aes(x = rate, y = score)) +
198-
geom_violin(aes(fill = rate), alpha = .5, trim = FALSE, show.legend = FALSE, cex = 0.5) +
199-
geom_abline(slope = 0, intercept = mean(score$score), lwd = 0.5, color = "red") +
200-
scale_fill_manual(values = c("#ff0000", "#08661e")) +
201-
geom_jitter(size = 0.2) + labs(y = "Score", x = "Rate") +
202-
annotate("text", x = 0.55, y = mean(score$score), cex = 3, vjust = 1.5,
203-
color = "black", label = mean(score$score), parse = TRUE) +
204-
labs(title = "Violin plots of Cell signature scores")
208+
geom_violin(aes(fill = rate), alpha = .5, trim = FALSE, show.legend = FALSE, cex = 0.5) +
209+
geom_abline(slope = 0, intercept = mean(score$score), lwd = 0.5, color = "red") +
210+
scale_fill_manual(values = c("#ff0000", "#08661e")) +
211+
geom_jitter(size = 0.2) +
212+
labs(y = "Score", x = "Rate") +
213+
annotate("text",
214+
x = 0.55, y = mean(score$score), cex = 3, vjust = 1.5,
215+
color = "black", label = mean(score$score), parse = TRUE
216+
) +
217+
labs(title = "Violin plots of Cell signature scores")
205218

206219
print(myplot)
207220
dev.off()
208221

209222
# Save file
210223
write.table(
211-
signature_output,
212-
opt$output,
213-
sep = "\t",
214-
quote = FALSE,
215-
col.names = TRUE,
216-
row.names = FALSE
224+
signature_output,
225+
opt$output,
226+
sep = "\t",
227+
quote = FALSE,
228+
col.names = TRUE,
229+
row.names = FALSE
217230
)
218231

219232
write.table(
220-
statistics,
221-
opt$stats,
222-
sep = "\t",
223-
quote = FALSE,
224-
col.names = TRUE,
225-
row.names = FALSE
233+
statistics,
234+
opt$stats,
235+
sep = "\t",
236+
quote = FALSE,
237+
col.names = TRUE,
238+
row.names = FALSE
226239
)

tools/gsc_signature_score/signature_score.xml

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
<tool id="signature_score" name="Compute signature scores" version="2.3.9+galaxy0">
1+
<tool id="signature_score" name="Compute signature scores" version="2.3.9+galaxy1">
22
<description>in single cell RNAseq</description>
3+
<xrefs>
4+
<xref type="bio.tools">galaxy_single_cell_suite</xref>
5+
</xrefs>
36
<requirements>
47
<requirement type="package" version="1.7.3">r-optparse</requirement>
58
<requirement type="package" version="3.4.4">r-ggplot2</requirement>

0 commit comments

Comments
 (0)