Skip to content

Commit 88639df

Browse files
author
nservant
committed
fix minor issues in codes and add hicpro2higlass utils
1 parent 1500c66 commit 88639df

File tree

6 files changed

+198
-9
lines changed

6 files changed

+198
-9
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ readstrimming: $(INST_SOURCES)/cutsite_trimming.cpp
5353

5454
## Build Python lib
5555
iced: $(INST_SOURCES)/ice_mod
56-
(cp $(INST_SOURCES)/ice_mod/iced/scripts/ice ${INST_SCRIPTS}; cd $(INST_SOURCES)/ice_mod/; ${PYTHON_PATH}/python setup.py install --user;)
56+
(cp $(INST_SOURCES)/ice_mod/iced/scripts/ice ${INST_SCRIPTS}; cd $(INST_SOURCES)/ice_mod/; ${PYTHON_PATH}/python setup.py install;)
5757

5858
test: config_check
5959
@echo ${PYTHON_PATH}

NEWS

+11-1
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,26 @@ CHANGES IN VERSION 2.10.0
33

44
NEW FEATURES
55

6+
o New utility - hicpro2higlass.sh to convert HiC-Pro output into higlass .cool files
7+
8+
o HiC-Pro is now availabe as a Singularity container !
9+
610
o hicpro2juicebox.sh utility now supports alof HiC-Pro format (< 2.7.5)
711

812
SIGNIFICANT USER-VISIBLE CHANGES
913

10-
o udpate R scripts to be compatible with the lastest ggplot2 version (>2.2.1)
14+
o udpate R scripts to be compatible with the lastest ggplot2 version (>2.2.1) and fix graphical bugs in quality controls
1115

1216
o add new checks on input files and configuration files
1317

1418
BUG FIXES
1519

20+
o Remove the --user option during iced installation
21+
22+
o R sessions are no longer saved and restored
23+
24+
o hicpro2fithic - bug fix when no -o option specified
25+
1626
o Fix bug to avoid floating values in valid pair positions
1727

1828
o Fix bug in order of samtools sort parameter in bowtie_combine.sh

README.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ How to install it ?
3838
The HiC-Pro pipeline requires the following dependencies :
3939

4040
* The `bowtie2 <http://bowtie-bio.sourceforge.net/bowtie2/index.shtml>`_ mapper
41-
* Python (>2.7) with *pysam (>=0.8.3)*, *bx(>=0.5.0)*, *numpy(>=1.8.2)*, and *scipy(>=0.15.1)* libraries
41+
* Python (>2.7) with *pysam (>=0.8.3)*, *bx-python(>=0.5.0)*, *numpy(>=1.8.2)*, and *scipy(>=0.15.1)* libraries
4242
* R with the *RColorBrewer* and *ggplot2* packages
4343
* g++ compiler
4444
* Samtools (>1.0)

bin/utils/hicpro2fithic.py

100644100755
+2-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
# Modified by Nicolas Servant - 1/23/2017
1111
# Modified by Ferhat Ay - 6/5/2017 - added resolution (-r <res>) argument to avoid some problems with inferring it from the first entry of bedFile
1212
# Modified by Ferhat Ay - 6/7/2017 - hitCount to ints, and gzip output
13+
# Modified by Arya Kaul - 12/21/2017 - bug fix when no -o option specified
1314

1415

1516
def outputfithicform(bedPath, matrixPath, intCPath, fragMapPath, biasVectorPath=None, biasVectorOutput=None,res=0):
@@ -94,7 +95,7 @@ def main():
9495
parser.add_argument("-i", "--matrix", help="Input matrix file with raw contact frequencies.", required=True)
9596
parser.add_argument("-b", "--bed", help="BED file with bins coordinates.", required=True)
9697
parser.add_argument("-s", "--bias", help="The bias file provided after IC normalization.", default=None)
97-
parser.add_argument("-o", "--output", help="Output path", default="./")
98+
parser.add_argument("-o", "--output", help="Output path", default=".")
9899
parser.add_argument("-r", "--resolution", help="Resolution of the matrix", type=int, default=0) # 0 means it is inferred from fragments file's first entry
99100

100101
args = parser.parse_args()

bin/utils/hicpro2higlass.sh

+178
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
#!/bin/bash
2+
3+
## HiC-Pro
4+
## Copyleft 2017 Institut Curie
5+
## Author(s): Nicolas Servant
6+
## Contact: [email protected]
7+
## This software is distributed without any guarantee under the terms of the BSD licence
8+
9+
##
10+
## First version of converter between HiCPro and higlass.
11+
## The cooler python package should be properly installed, as well as the higlass software
12+
##
13+
14+
function usage {
15+
echo -e "usage : hicpro2higlass -i INPUT -r RESOLUTION -c CHROMSIZE [-n] [-h]" ## [-b BINS]
16+
echo -e "Use option -h|--help for more information"
17+
}
18+
19+
function help {
20+
usage;
21+
echo
22+
echo "Generate Higlass input file from HiC-Pro results"
23+
echo "See https://github.com/hms-dbmi/higlass-website for details about Higlass"
24+
echo "---------------"
25+
echo "OPTIONS"
26+
echo
27+
echo " -i|--input INPUT : allValidPairs or matrix file generated by HiC-Pro"
28+
# echo " -b|--bed BINS : bed file generated by HiC-Pro with intervals coordinates (require if input is a .matrix file)"
29+
echo " -r|--res RESOLUTION : .matrix file resolution or maximum resolution to reach from the .allValidPairs input file"
30+
echo " -c|--chrom CHROMSIZE : chromosome size file"
31+
echo " [-n|--norm] : run cooler matrix balancing algorithm"
32+
echo " [-h|--help]: help"
33+
exit;
34+
}
35+
36+
37+
if [ $# -lt 1 ]
38+
then
39+
usage
40+
exit
41+
fi
42+
43+
# Transform long options to short ones
44+
for arg in "$@"; do
45+
shift
46+
case "$arg" in
47+
"--input") set -- "$@" "-i" ;;
48+
"--bed") set -- "$@" "-b" ;;
49+
"--res") set -- "$@" "-r" ;;
50+
"--chrom") set -- "$@" "-c" ;;
51+
"--norm") set -- "$@" "-n" ;;
52+
"--help") set -- "$@" "-h" ;;
53+
*) set -- "$@" "$arg"
54+
esac
55+
done
56+
57+
INPUT_HICPRO=""
58+
INPUT_BED=""
59+
NORMALIZE=0
60+
CHROMSIZES_FILE=""
61+
RES=10000
62+
63+
while getopts ":i:b:c:r:nh" OPT
64+
do
65+
case $OPT in
66+
i) INPUT_HICPRO=$OPTARG;;
67+
b) INPUT_BED=$OPTARG;;
68+
n) NORMALIZE=1;;
69+
c) CHROMSIZES_FILE=$OPTARG;;
70+
r) RES=$OPTARG;;
71+
h) help ;;
72+
\?)
73+
echo "Invalid option: -$OPTARG" >&2
74+
usage
75+
exit 1
76+
;;
77+
:)
78+
echo "Option -$OPTARG requires an argument." >&2
79+
usage
80+
exit 1
81+
;;
82+
esac
83+
done
84+
85+
if [[ -z $INPUT_HICPRO ]];
86+
then
87+
usage
88+
exit
89+
fi
90+
91+
## Detect input data type
92+
DATATYPE=""
93+
if [[ $INPUT_HICPRO == *.matrix ]]; then
94+
DATATYPE="MATRIX"
95+
if [[ -z $INPUT_BED ]]; then
96+
echo -e "Exit. BED file is required with .matrix file."
97+
usage
98+
exit 1
99+
fi
100+
elif [[ $INPUT_HICPRO == *allValidPairs ]]; then
101+
DATATYPE="VALID"
102+
else
103+
echo -e "Unknown input data type. Expect .matrix or _allValidPairs input files."
104+
exit 1
105+
fi
106+
echo -e "$DATATYPE input file detected ..."
107+
108+
## Check cooler version
109+
which cooler > /dev/null;
110+
if [ $? != "0" ]; then
111+
echo -e "Cooler is not installed or is not in your $PATH. See https://github.com/mirnylab/cooler for details."
112+
exit 1;
113+
fi
114+
115+
COOLER_VERSION=$(cooler --version 2>&1 | awk '{print $NF}')
116+
echo "Cooler version $COOLER_VERSION detected ..."
117+
if [[ "$COOLER_VERSION" < "0.7.6" ]]; then
118+
echo "Cooler version must be >= 0.7.6 ! Stop."
119+
fi
120+
121+
if [[ $DATATYPE == "VALID" ]]; then
122+
which pairix > /dev/null;
123+
if [ $? != "0" ]; then
124+
echo -e "Pairix is not installed or is not in your $PATH. See https://github.com/4dn-dcic/pairix."
125+
exit 1;
126+
fi
127+
fi
128+
129+
echo -e "\nGenerating .cool files ..."
130+
tmp_dir=./_tmp$$
131+
mkdir -p $tmp_dir
132+
133+
if [[ $DATATYPE == "MATRIX" ]]; then
134+
out=$(basename $INPUT_HICPRO | sed -e 's/.matrix/.cool/')
135+
136+
cooler makebins $CHROMSIZES_FILE $RES > $tmp_dir/bins.bed
137+
cooler load -f coo --one-based $tmp_dir/bins.bed $INPUT_HICPRO $out
138+
139+
echo -e "\nZoomify .cool file ..."
140+
if [[ $NORMALIZE == 1 ]]; then
141+
cooler zoomify $out
142+
else
143+
cooler zoomify --no-balance $out
144+
fi
145+
out=$(basename $INPUT_HICPRO | sed -e 's/.matrix/.mcool/')
146+
147+
elif [[ $DATATYPE == "VALID" ]]; then
148+
out=$(basename $INPUT_HICPRO | sed -e 's/_allValidPairs/.cool/')
149+
150+
awk '{OFS="\t";print $2,$3,$4,$5,$6,$7,1}' $INPUT_HICPRO | sed -e 's/+/1/g' -e 's/-/16/g' > $tmp_dir/contacts.txt
151+
#awk '{print $1,$4,$2,$3,$9,$7,$5,$6,$10,$11,$12}' SRR3179588_WT_sample_allValidPairs | sed -e 's/+/1/g' -e 's/-/16/g' > contacts.txt
152+
cooler csort --nproc 2 -c1 1 -p1 2 -s1 3 -c2 4 -p2 5 -s2 6 \
153+
-o $tmp_idr/contacts.sorted.txt.gz \
154+
$tmp_dir/contacts.txt \
155+
$CHROMSIZES_FILE
156+
157+
#cooler csort -c1 2 -p1 3 -c2 5 -p2 6 --sep '\t' --out contacts.sorted.txt.gz $INPUT_HICPRO $CHROMSIZES_FILE
158+
cooler makebins $CHROMSIZES_FILE $RES > $tmp_dir/bins.bed
159+
cooler cload pairix $tmp_dir/bins.bed $tmp_dir/contacts.sorted.txt.gz $out
160+
161+
echo -e "\nZoomify .cool file ..."
162+
if [[ $NORMALIZE == 1 ]]; then
163+
cooler zoomify $out
164+
else
165+
cooler zoomify --no-balance $out
166+
fi
167+
out=$(basename $INPUT_HICPRO | sed -e 's/_allValidPairs/.mcool/')
168+
fi
169+
170+
## clean
171+
#/bin/rm -f $tmp_dir
172+
173+
echo -e "\nCooler file generated with success ..."
174+
echo "Please copy the file $out in your Higlass input directory and run :"
175+
echo "docker exec higlass-container python higlass-server/manage.py ingest_tileset --filename /tmp/$out --datatype matrix --filetype cooler"
176+
177+
178+

scripts/make_plots.sh

+5-5
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ if [ -d ${BOWTIE2_FINAL_OUTPUT_DIR} ]; then
6464
nb=$(find $MAPPING_DIR -name "*.mapstat" | wc -l)
6565
if [[ $nb > 0 ]]; then
6666
echo "Quality checks - Mapping results ..."
67-
cmd="${R_PATH}/R --no-save --no-restore CMD BATCH \"--args picDir='${PIC_DIR}' bwtDir='${MAPPING_DIR}' sampleName='${RES_FILE_NAME}' r1tag='${PAIR1_EXT}' r2tag='${PAIR2_EXT}'\" ${SCRIPTS}/plot_mapping_portion.R ${LDIR}/plot_mapping_portion.Rout"
67+
cmd="${R_PATH}/R CMD BATCH --no-save --no-restore \"--args picDir='${PIC_DIR}' bwtDir='${MAPPING_DIR}' sampleName='${RES_FILE_NAME}' r1tag='${PAIR1_EXT}' r2tag='${PAIR2_EXT}'\" ${SCRIPTS}/plot_mapping_portion.R ${LDIR}/plot_mapping_portion.Rout"
6868
exec_cmd $cmd
6969
fi
7070
fi
@@ -75,7 +75,7 @@ if [ -d ${BOWTIE2_FINAL_OUTPUT_DIR} ]; then
7575
nb=$(find $MAPPING_DIR -name "*.pairstat" | wc -l)
7676
if [[ $nb > 0 ]]; then
7777
echo "Quality Cheks - Pairing results ..."
78-
cmd="${R_PATH}/R --no-save --no-restore CMD BATCH \"--args picDir='${PIC_DIR}' bwtDir='${MAPPING_DIR}' sampleName='${RES_FILE_NAME}' rmMulti='${RM_MULTI}' rmSingle='${RM_SINGLETON}'\" ${SCRIPTS}/plot_pairing_portion.R ${LDIR}/plot_pairing_portion.Rout"
78+
cmd="${R_PATH}/R CMD BATCH --no-save --no-restore \"--args picDir='${PIC_DIR}' bwtDir='${MAPPING_DIR}' sampleName='${RES_FILE_NAME}' rmMulti='${RM_MULTI}' rmSingle='${RM_SINGLETON}'\" ${SCRIPTS}/plot_pairing_portion.R ${LDIR}/plot_pairing_portion.Rout"
7979
exec_cmd $cmd
8080
fi
8181
fi
@@ -103,7 +103,7 @@ if [ -d ${DATA_DIR} ]; then
103103
nb=$(find $DATA_DIR -name "*.RSstat" | wc -l)
104104
if [[ $nb > 0 ]]; then
105105
echo "Quality checks - Hi-C processing ..."
106-
cmd="${R_PATH}/R --no-save --no-restore CMD BATCH \"--args picDir='${PIC_DIR}' hicDir='${DATA_DIR}' sampleName='${RES_FILE_NAME}'\" ${SCRIPTS}/plot_hic_fragment.R ${LDIR}/plot_hic_fragment.Rout"
106+
cmd="${R_PATH}/R CMD BATCH --no-save --no-restore \"--args picDir='${PIC_DIR}' hicDir='${DATA_DIR}' sampleName='${RES_FILE_NAME}'\" ${SCRIPTS}/plot_hic_fragment.R ${LDIR}/plot_hic_fragment.Rout"
107107
exec_cmd $cmd
108108
fi
109109
fi
@@ -114,9 +114,9 @@ if [ -d ${DATA_DIR} ]; then
114114
nb=$(find $DATA_DIR -name "*.mergestat" | wc -l)
115115
if [[ $nb > 0 ]]; then
116116
echo "Quality checks - Hi-C contact maps ..."
117-
cmd="${R_PATH}/R --no-save --no-restore CMD BATCH \"--args picDir='${PIC_DIR}' hicDir='${DATA_DIR}' sampleName='${RES_FILE_NAME}'\" ${SCRIPTS}/plot_hic_contacts.R ${LDIR}/plot_hic_contacts.Rout"
117+
cmd="${R_PATH}/R CMD BATCH --no-save --no-restore \"--args picDir='${PIC_DIR}' hicDir='${DATA_DIR}' sampleName='${RES_FILE_NAME}'\" ${SCRIPTS}/plot_hic_contacts.R ${LDIR}/plot_hic_contacts.Rout"
118118
exec_cmd $cmd
119119
fi
120120
fi
121121
done
122-
fi
122+
fi

0 commit comments

Comments
 (0)