-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathpipeline.yml
195 lines (179 loc) · 5.75 KB
/
pipeline.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# Pipeline pipeline_clustering_scanpy.py configuration file
# ==============================================
# compute resource options
# ------------------------
resources:
# Number of threads used for parallel jobs
# this must be enough memory to load your mudata and do computationally intensive tasks
threads_high: 2
# this must be enough memory to load your mudata and do computationally light tasks
threads_medium: 2
# this must be enough memory to load text files and do plotting, required much less memory than the other two
threads_low: 2
fewer_jobs: True
# Add path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv:
# Start
# --------------------------
sample_prefix: teaseq
scaled_obj: ../teaseq_temp.h5mu
# full obj only applicable if you have filtered your scaled object by hvgs
# in this case panpipes will use the full obj to calculate rank_gene_groups and for plotting those genes
# it should contain all the genes you want to include in rank_gene_groups, plus logged_counts as a layer
# if your scaled_obj contains all the genes then leave full_obj blank
full_obj:
# run clustering on each individual modality:
modalities:
rna: True
prot: True
atac: True
# if True, will look for WNN, or totalVI output
multimodal:
run_clustering: True
#WNN, mofa, totalVI # this will tell us where to look for
integration_method: wnn
# batch_correction: harmony # None, harmony, scanorama, bbknn or combat
# ---------------------------------------
# parameters for find neighbours
# ---------------------------------------
# find neighbour parameters
#-----------------------------
# number of neighbors to use when calculating the graph for clustering and umap.
neighbors:
rna:
use_existing: True
# number of Principal Components to calculate for neighbours and umap:
dim_red: X_pca
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy
prot:
use_existing: True
# number of Principal Components to calculate for neighbours and umap:
dim_red: X_pca
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy
atac:
use_existing: True
# number of Principal Components to calculate for neighbours and umap:
dim_red: X_pca
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy
# ---------------------------------------
# parameters for umap calculation
# ---------------------------------------
umap:
# set run to False if you are happy with the existing umap from integration
run: True
rna:
mindist:
- 0.25
- 0.5
prot:
mindist:
- 0.1
atac:
mindist:
- 0.5
multimodal:
mindist:
- 0.5
# UMAP reduced dimensions will be stored using the format
# ---------------------------------------
# parameters for clustering
# ---------------------------------------
clusterspecs:
rna:
resolutions:
- 0.2
- 0.6
- 1
algorithm: leiden # (louvain or leiden)
prot:
resolutions:
- 0.2
- 0.6
- 1
algorithm: leiden # (louvain or leiden)
atac:
resolutions:
- 0.2
- 0.3
algorithm: leiden # (louvain or leiden)
multimodal:
resolutions:
- 0.5
- 0.7
algorithm: leiden
# ---------------------------------------
# parameters for finding marker genes
# ---------------------------------------
# where pseudo_suerat is set to False
# we run https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
# where pseudo_seurat is set to True, we run an a python implementation of Seurat::FindMarkers (written by CRG) is used,
markerspecs:
rna:
run: True
layer: logged_counts
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
method: t-test_overestim_var
mincells: 10 # if a cluster contains less than n cells then do not bother doing marker analysis
# where pseudo_suerat is set to False
# we run https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
# where pseudo_seurat is set to True, we run an a python implementation of Seurat::FindMarkers (written by CRG) is used,
pseudo_seurat: False
# these next two settings do not matter unless pseudo_seurat is set to True,
# If applicable look at Seurat documentation for FindMarkers for details
minpct: 0.1
threshuse: 0.25
prot:
run:
layer: clr
mincells: 10 # if a cluster contains less than n cells then do not bother doing marker analysis
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
method: wilcoxon
pseudo_seurat: False
minpct: 0.1
threshuse: 0.25
atac:
run: True
layer: signac_norm
mincells: 10
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
method: wilcoxon
pseudo_seurat: False
minpct: 0.1
threshuse: 0.25
multimodal:
mincells: 10
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
method: wilcoxon
pseudo_seurat: False
minpct: 0.1
threshuse: 0.25
# ---------------------------------------
# plot specs are used to define which metadata columns are used in the visualisations
# ---------------------------------------
plotspecs:
layers:
rna: logged_counts
prot: clr
atac: signac_norm
top_n_markers: 10