forked from sebgra/hicberg
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
85 lines (67 loc) · 2.63 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/bin/env snakemake -s
import numpy as np
import pandas as pd
from os.path import join
from snakemake.utils import validate
# Set parameters.
shell.prefix("set -euo pipefail;")
# LOAD CONFIG FILES
configfile: 'config/config.yaml'
samples = pd.read_csv(
config['samples'],
sep=';',
dtype=str,
comment='#',
).set_index(['library'], drop=False)
# Set paths for both input and output files.
OUT_DIR = config['out_dir']
REF_DIR = join(config['base_dir'], config['ref_dir'])
FASTQ_DIR = join(config['base_dir'], config['fastq_dir'])
libraries = np.unique(samples.library)
species = np.unique(samples.species)
sampling_rates = samples.sampling_rates
enzymes = samples.enzymes
modes = samples.modes
resolutions = samples.resolutions
kernel_sizes = samples.kernel_sizes
deviations = samples.deviations
max_reports = samples.max_reports
circularity = samples.circularity
distances = samples.distances
blacklists = samples.blacklists
wildcard_constraints:
libraries = "|".join(libraries),
species = "|".join(species),
sampling_rates = "|".join(sampling_rates),
enzymes = "|".join(enzymes),
modes = "|".join(modes),
resolutions = "|".join(resolutions),
kernel_sizes = "|".join(kernel_sizes),
deviations = "|".join(deviations),
max_reports = "|".join(max_reports),
circularity = "|".join(circularity),
distances = "|".join(distances),
blacklists = "|".join(blacklists)
########################################################################
########################### RULES PART #################################
########################################################################
# OUTPUT = "/home/sardine/Bureau/"
THREADS = 16
# Set parameters.
shell.prefix("set -euo pipefail;")
# Pipeline sub-workflows
include: 'rules/00_hicberg_step_0.smk'
include: 'rules/01_hicberg_step_1.smk'
include: 'rules/02_hicberg_step_2.smk'
include: 'rules/03_hicberg_step_3.smk'
include: 'rules/04_hicberg_step_4.smk'
include: 'rules/05_hicberg_step_5.smk'
# Specify at least one output of each rule (or one of the last output) to ensure rule execution.
rule all:
input:
expand(join(OUT_DIR, '{libraries}', "fragments_fixed_sizes.txt"), libraries=libraries),
expand(join(OUT_DIR, '{libraries}', "1.sorted.bam"), libraries=libraries),
expand(join(OUT_DIR, '{libraries}', "group1.1.bam"), libraries=libraries),
expand(join(OUT_DIR, '{libraries}', "unrescued_map.cool"), libraries=libraries),
expand(join(OUT_DIR, '{libraries}', "restriction_map.npy"), libraries=libraries),
expand(join(OUT_DIR, '{libraries}', "contacts", "matrices", "rescued_map.cool"), libraries = libraries),