Skip to content

Commit 8b1569b

Browse files
authored
Merge pull request #145 from Irallia/MISC/small_corrections_2
[MISC, DOC] Style changes and some documentation
2 parents 97aef1c + e1eadcb commit 8b1569b

13 files changed

+153
-32
lines changed

Diff for: LICENSE.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
BSD 3-Clause License
1+
# BSD 3-Clause License
22

33
Copyright (c) 2020, Jörg Winkler
44
All rights reserved.

Diff for: README.md

+27-3
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,29 @@
44

55
The official repository for the iGenVar project.
66

7+
iGenVar is intended to be a caller for all types of genetic variation: SNPs, indels and larger structural variations
8+
(insertions, deletions, inversions, translocations, CNVs, nested SVs).
9+
It uses both Illumina short reads and PacBio long reads for this purpose.
10+
11+
David Heller in the Vingron lab of the MPI-MG and Tim White in the Kehr lab at BIH have both developed an SV caller for
12+
long read sequencing data. Instead of competing with each other, we want to join forces and combine the two tools, SVIM
13+
and SVIRL, into one better and more versatile tool.
14+
On the other hand, there were some tool developments in the Reinert Lab (FU Berlin): Vaquita, a short read SV caller,
15+
SViper, a refinement tool and Vaquita-LR a further development of Vaquita for long reads.
16+
We want to combine these approaches and use the SeqAn3 library as a basis for this new tool.
17+
18+
## Current status:
19+
20+
We can call insertions and deletions from long read data (SVIM methods implemented).
21+
For more information, see the release plan at the bottom of the page.
22+
723
## Installation
824

925
Instructions:
26+
1027
1. clone this repository: `git clone --recurse-submodules https://github.com/seqan/iGenVar.git`
11-
or `git clone https://github.com/seqan/iGenVar.git` and fetch the seqan3 submodule after cloning: `git submodule update --recursive --init`
28+
or `git clone https://github.com/seqan/iGenVar.git` and fetch the seqan3 submodule after cloning:
29+
`git submodule update --recursive --init`
1230
2. create a build directory and visit it: `mkdir build && cd build`
1331
3. run cmake: `cmake ../iGenVar`
1432
4. build the application: `make`
@@ -19,5 +37,11 @@ Instructions:
1937
(Built using the [SeqAn3 App Template](https://github.com/seqan/app-template))
2038

2139
We created small examples, which you can use to test our app:
22-
`./bin/iGenVar -i ./test/data/paired_end_short_read_mini_example.sam -j ./test/data/single_end_mini_example.sam `
23-
`-o ./test/data/output.vcf --method cigar_string --method split_read --min_var_length 5`
40+
```bash
41+
./bin/iGenVar -i ./test/data/paired_end_short_read_mini_example.sam -j ./test/data/single_end_mini_example.sam \
42+
-o ./test/data/output.vcf --method cigar_string --method split_read --min_var_length 5
43+
```
44+
45+
## Release plan:
46+
47+
<p align="center"><img height="500" src="https://github.com/seqan/iGenVar/tree/master/doc/ReleasePlan.png"></p>

Diff for: doc/ReleasePlan.png

527 KB
Loading

Diff for: include/modules/clustering/hierarchical_clustering_method.hpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,4 @@ int junction_distance(Junction const & lhs, Junction const & rhs);
4343
* \details For the algorithms we use the library hclust.
4444
* \see https://lionel.kr.hs-niederrhein.de/~dalitz/data/hclust/ (last access 01.06.2021).
4545
*/
46-
std::vector<Cluster> hierarchical_clustering_method(std::vector<Junction> const & junctions,
47-
double clustering_cutoff);
46+
std::vector<Cluster> hierarchical_clustering_method(std::vector<Junction> const & junctions, double clustering_cutoff);

Diff for: src/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ add_library ("${PROJECT_NAME}_lib" STATIC modules/clustering/hierarchical_cluste
55
modules/clustering/simple_clustering_method.cpp
66
modules/sv_detection_methods/analyze_cigar_method.cpp
77
modules/sv_detection_methods/analyze_read_pair_method.cpp
8-
modules/sv_detection_methods/analyze_sa_tag_method.cpp
8+
modules/sv_detection_methods/analyze_split_read_method.cpp
99
structures/aligned_segment.cpp
1010
structures/breakend.cpp
1111
structures/cluster.cpp

Diff for: src/modules/clustering/hierarchical_clustering_method.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,7 @@ inline std::vector<Junction> subsample_partition(std::vector<Junction> const & p
116116
return subsample;
117117
}
118118

119-
std::vector<Cluster> hierarchical_clustering_method(std::vector<Junction> const & junctions,
120-
double clustering_cutoff)
119+
std::vector<Cluster> hierarchical_clustering_method(std::vector<Junction> const & junctions, double clustering_cutoff)
121120
{
122121
auto partitions = partition_junctions(junctions);
123122
std::vector<Cluster> clusters{};

Diff for: src/modules/sv_detection_methods/analyze_sa_tag_method.cpp renamed to src/modules/sv_detection_methods/analyze_split_read_method.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "modules/sv_detection_methods/analyze_sa_tag_method.hpp"
1+
#include "modules/sv_detection_methods/analyze_split_read_method.hpp"
22

33
#include <seqan3/core/debug_stream.hpp>
44

@@ -97,7 +97,7 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
9797
// map to different reference sequences (e.g. translocation, interspersed duplication),
9898
// have a large distance on the reference (e.g. deletion, inversion, tandem duplication), or
9999
// have a large distance on the read (e.g. insertion)
100-
if (current.ref_name != next.ref_name ||
100+
if (current.ref_name != next.ref_name || //TODO / QUESTION (irallia 23.06.2021): What about translocation on the same ref?
101101
std::abs(distance_on_ref) >= min_length ||
102102
distance_on_read >= min_length)
103103
{
@@ -114,7 +114,8 @@ void analyze_aligned_segments(std::vector<AlignedSegment> const & aligned_segmen
114114
}
115115
else
116116
{
117-
auto inserted_bases = query_sequence | seqan3::views::slice(current.get_query_end(), next.get_query_start());
117+
auto inserted_bases = query_sequence | seqan3::views::slice(current.get_query_end(),
118+
next.get_query_start());
118119
junctions.emplace_back(mate1, mate2, inserted_bases, read_name);
119120
}
120121
seqan3::debug_stream << "BND: " << junctions.back() << "\n";
@@ -139,6 +140,7 @@ void analyze_sa_tag(std::string const & query_name,
139140
strand strand = (hasFlagReverseComplement(flag) ? strand::reverse : strand::forward);
140141
aligned_segments.push_back(AlignedSegment{strand, ref_name, pos, mapq, cigar});
141142
retrieve_aligned_segments(sa_tag, aligned_segments);
143+
// sort by query start, query end, mapping quality (in this order):
142144
std::sort(aligned_segments.begin(), aligned_segments.end());
143145
analyze_aligned_segments(aligned_segments,
144146
junctions,

Diff for: src/variant_detection/method_enums.cpp

+18-12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
std::unordered_map<std::string, detection_methods> enumeration_names(detection_methods)
44
{
5+
// LCOV_EXCL_START
56
return std::unordered_map<std::string, detection_methods>{{"0", detection_methods::cigar_string},
67
{"cigar_string", detection_methods::cigar_string},
78
{"1", detection_methods::split_read},
@@ -10,27 +11,31 @@ return std::unordered_map<std::string, detection_methods>{{"0", detection_method
1011
{"read_pairs", detection_methods::read_pairs},
1112
{"3", detection_methods::read_depth},
1213
{"read_depth", detection_methods::read_depth}};
14+
// LCOV_EXCL_STOP
1315
};
1416

1517
std::unordered_map<std::string, clustering_methods> enumeration_names(clustering_methods)
1618
{
19+
// LCOV_EXCL_START
1720
return std::unordered_map<std::string,
18-
clustering_methods>{{"0", clustering_methods::simple_clustering},
19-
{"simple_clustering",
20-
clustering_methods::simple_clustering},
21-
{"1", clustering_methods::hierarchical_clustering},
22-
{"hierarchical_clustering",
23-
clustering_methods::hierarchical_clustering},
24-
{"2", clustering_methods::self_balancing_binary_tree},
25-
{"self_balancing_binary_tree",
26-
clustering_methods::self_balancing_binary_tree},
27-
{"3", clustering_methods::candidate_selection_based_on_voting},
28-
{"candidate_selection_based_on_voting",
29-
clustering_methods::candidate_selection_based_on_voting}};
21+
clustering_methods>{{"0", clustering_methods::simple_clustering},
22+
{"simple_clustering",
23+
clustering_methods::simple_clustering},
24+
{"1", clustering_methods::hierarchical_clustering},
25+
{"hierarchical_clustering",
26+
clustering_methods::hierarchical_clustering},
27+
{"2", clustering_methods::self_balancing_binary_tree},
28+
{"self_balancing_binary_tree",
29+
clustering_methods::self_balancing_binary_tree},
30+
{"3", clustering_methods::candidate_selection_based_on_voting},
31+
{"candidate_selection_based_on_voting",
32+
clustering_methods::candidate_selection_based_on_voting}};
33+
// LCOV_EXCL_STOP
3034
};
3135

3236
std::unordered_map<std::string, refinement_methods> enumeration_names(refinement_methods)
3337
{
38+
// LCOV_EXCL_START
3439
return std::unordered_map<std::string, refinement_methods>{{"0", refinement_methods::no_refinement},
3540
{"no_refinement",
3641
refinement_methods::no_refinement},
@@ -40,4 +45,5 @@ return std::unordered_map<std::string, refinement_methods>{{"0", refinement_meth
4045
{"2", refinement_methods::sVirl_refinement_method},
4146
{"sVirl_refinement_method",
4247
refinement_methods::sVirl_refinement_method}};
48+
// LCOV_EXCL_STOP
4349
};

Diff for: src/variant_detection/variant_detection.cpp

+5-6
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
#include <seqan3/core/debug_stream.hpp>
44
#include <seqan3/io/sam_file/input.hpp> // SAM/BAM support (seqan3::sam_file_input)
55

6-
#include "modules/sv_detection_methods/analyze_cigar_method.hpp" // for the split read method
7-
#include "modules/sv_detection_methods/analyze_read_pair_method.hpp"// for the read pair method
8-
#include "modules/sv_detection_methods/analyze_sa_tag_method.hpp" // for the cigar string method
9-
#include "variant_detection/bam_functions.hpp" // for hasFlag* functions
6+
#include "modules/sv_detection_methods/analyze_cigar_method.hpp" // for the split read method
7+
#include "modules/sv_detection_methods/analyze_read_pair_method.hpp" // for the read pair method
8+
#include "modules/sv_detection_methods/analyze_split_read_method.hpp" // for the cigar string method
9+
#include "variant_detection/bam_functions.hpp" // for hasFlag* functions
1010

1111
using seqan3::operator""_tag;
1212

@@ -167,8 +167,7 @@ void detect_junctions_in_long_reads_sam_file(std::vector<Junction> & junctions,
167167
}
168168
}
169169
break;
170-
case detection_methods::read_pairs: // There are no read pairs in long reads.
171-
break;
170+
// There are no read pairs in long reads.
172171
case detection_methods::read_depth: // Detect junctions from read depth evidence
173172
seqan3::debug_stream << "The read depth method for long reads is not yet implemented.\n";
174173
break;

Diff for: test/api/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,5 @@ add_api_test (detection_test.cpp)
88
add_api_test (clustering_test.cpp)
99

1010
# add_api_test (refinement_test.cpp)
11+
12+
add_api_test (structures_test.cpp)

Diff for: test/api/detection_test.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
#include <seqan3/alphabet/cigar/cigar.hpp>
44
#include <seqan3/io/sam_file/sam_flag.hpp>
55

6-
#include "modules/sv_detection_methods/analyze_cigar_method.hpp" // for the split read method
7-
#include "modules/sv_detection_methods/analyze_sa_tag_method.hpp" // for the cigar string method
6+
#include "modules/sv_detection_methods/analyze_cigar_method.hpp" // for the split read method
7+
#include "modules/sv_detection_methods/analyze_split_read_method.hpp" // for the cigar string method
88

99
using seqan3::operator""_cigar_operation;
1010
using seqan3::operator""_dna5;

Diff for: test/api/structures_test.cpp

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#include <gtest/gtest.h>
2+
3+
#include "structures/breakend.hpp"
4+
#include "variant_detection/method_enums.hpp"
5+
6+
/* tests for method_enums */
7+
8+
TEST(structures, method_enums_detection_methods)
9+
{
10+
std::unordered_map<std::string,
11+
detection_methods> cigar_string_mapping = enumeration_names(detection_methods::cigar_string);
12+
EXPECT_EQ(detection_methods::cigar_string, cigar_string_mapping["cigar_string"]);
13+
EXPECT_EQ(detection_methods::cigar_string, cigar_string_mapping["0"]);
14+
15+
std::unordered_map<std::string,
16+
detection_methods> split_read_mapping = enumeration_names(detection_methods::split_read);
17+
EXPECT_EQ(detection_methods::split_read, split_read_mapping["split_read"]);
18+
EXPECT_EQ(detection_methods::split_read, split_read_mapping["1"]);
19+
20+
std::unordered_map<std::string,
21+
detection_methods> read_pairs_mapping = enumeration_names(detection_methods::read_pairs);
22+
EXPECT_EQ(detection_methods::read_pairs, read_pairs_mapping["read_pairs"]);
23+
EXPECT_EQ(detection_methods::read_pairs, read_pairs_mapping["2"]);
24+
25+
std::unordered_map<std::string,
26+
detection_methods> read_depth_mapping = enumeration_names(detection_methods::read_depth);
27+
EXPECT_EQ(detection_methods::read_depth, read_depth_mapping["read_depth"]);
28+
EXPECT_EQ(detection_methods::read_depth, read_depth_mapping["3"]);
29+
}
30+
31+
TEST(structures, method_enums_clustering_methods)
32+
{
33+
std::unordered_map<std::string, clustering_methods> simple_clustering_mapping
34+
= enumeration_names(clustering_methods::simple_clustering);
35+
EXPECT_EQ(clustering_methods::simple_clustering, simple_clustering_mapping["simple_clustering"]);
36+
EXPECT_EQ(clustering_methods::simple_clustering, simple_clustering_mapping["0"]);
37+
38+
std::unordered_map<std::string, clustering_methods> hierarchical_clustering_mapping
39+
= enumeration_names(clustering_methods::hierarchical_clustering);
40+
EXPECT_EQ(clustering_methods::hierarchical_clustering, hierarchical_clustering_mapping["hierarchical_clustering"]);
41+
EXPECT_EQ(clustering_methods::hierarchical_clustering, hierarchical_clustering_mapping["1"]);
42+
43+
std::unordered_map<std::string, clustering_methods> self_balancing_binary_tree_mapping
44+
= enumeration_names(clustering_methods::self_balancing_binary_tree);
45+
EXPECT_EQ(clustering_methods::self_balancing_binary_tree,
46+
self_balancing_binary_tree_mapping["self_balancing_binary_tree"]);
47+
EXPECT_EQ(clustering_methods::self_balancing_binary_tree, self_balancing_binary_tree_mapping["2"]);
48+
49+
std::unordered_map<std::string, clustering_methods> candidate_selection_based_on_voting_mapping
50+
= enumeration_names(clustering_methods::candidate_selection_based_on_voting);
51+
EXPECT_EQ(clustering_methods::candidate_selection_based_on_voting,
52+
candidate_selection_based_on_voting_mapping["candidate_selection_based_on_voting"]);
53+
EXPECT_EQ(clustering_methods::candidate_selection_based_on_voting,
54+
candidate_selection_based_on_voting_mapping["3"]);
55+
}
56+
57+
TEST(structures, method_enums_refinement_methods)
58+
{
59+
std::unordered_map<std::string, refinement_methods> no_refinement_mapping
60+
= enumeration_names(refinement_methods::no_refinement);
61+
EXPECT_EQ(refinement_methods::no_refinement, no_refinement_mapping["no_refinement"]);
62+
EXPECT_EQ(refinement_methods::no_refinement, no_refinement_mapping["0"]);
63+
64+
std::unordered_map<std::string, refinement_methods> sViper_refinement_method_mapping
65+
= enumeration_names(refinement_methods::sViper_refinement_method);
66+
EXPECT_EQ(refinement_methods::sViper_refinement_method,
67+
sViper_refinement_method_mapping["sViper_refinement_method"]);
68+
EXPECT_EQ(refinement_methods::sViper_refinement_method, sViper_refinement_method_mapping["1"]);
69+
70+
std::unordered_map<std::string, refinement_methods> sVirl_refinement_method_mapping
71+
= enumeration_names(refinement_methods::sVirl_refinement_method);
72+
EXPECT_EQ(refinement_methods::sVirl_refinement_method, sVirl_refinement_method_mapping["sVirl_refinement_method"]);
73+
EXPECT_EQ(refinement_methods::sVirl_refinement_method, sVirl_refinement_method_mapping["2"]);
74+
}
75+
76+
/* tests for junctions */
77+
78+
TEST(structures, breakend_flip_orientation)
79+
{
80+
Breakend forward_breakend{"chr1", 42, strand::forward};
81+
Breakend reverse_breakend{"chr1", 42, strand::reverse};
82+
83+
EXPECT_NE(forward_breakend, reverse_breakend);
84+
forward_breakend.flip_orientation();
85+
EXPECT_EQ(forward_breakend, reverse_breakend); // both are reverse now
86+
reverse_breakend.flip_orientation();
87+
EXPECT_NE(forward_breakend, reverse_breakend);
88+
forward_breakend.flip_orientation();
89+
EXPECT_EQ(forward_breakend, reverse_breakend); // both are forward now
90+
}

0 commit comments

Comments
 (0)