Skip to content

Commit

Permalink
Merge pull request #223 from Irallia/FIX/readd_removed_if
Browse files Browse the repository at this point in the history
[FIX] Readd removed if case
  • Loading branch information
Irallia authored Jul 26, 2022
2 parents f52c733 + 3cd2375 commit 344ccb4
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 70 deletions.
Binary file removed doc/plots/iGenVar_only-results.DUP_as_INS.all.png
Binary file not shown.
Binary file removed doc/plots/iGenVar_only-results.all.png
Binary file not shown.
Binary file added doc/plots/iGenVar_only/results.all.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
117 changes: 60 additions & 57 deletions src/variant_detection/variant_output.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,72 +72,75 @@ void write_record(Cluster const & cluster,

if (mate1.seq_name == mate2.seq_name)
{
size_t const insert_size = cluster.get_average_inserted_sequence_size();
int const distance = mate2.position - mate1.position - 1;
int sv_length{};
int sv_length_iGenVar{};
std::string sv_type;

// Tandem Duplication
// In case of a small deletion inside of a duplication, the distance is a small positive value
if (cluster.get_common_tandem_dup_count() > 0 && static_cast<uint64_t>(distance) <= args.max_tol_deleted_length)
if (mate1.orientation == strand::forward)
{
record.alt() = {"<DUP:TANDEM>"};
// Increment end by 1 because VCF is 1-based
record.info().push_back({.id = "END", .value = mate2.position + 1});
sv_length = distance + 2;
sv_length_iGenVar = insert_size;
sv_type = "DUP";
}
// Deletion OR Inversion
else if (distance > 0)
{
// Inversion
// An Inversion consists of 2 Breakpoints, thus it looks like a deletion with an inserted sequence
if (insert_size >= args.min_var_length)
size_t const insert_size = cluster.get_average_inserted_sequence_size();
int const distance = mate2.position - mate1.position - 1;
int sv_length{};
int sv_length_iGenVar{};
std::string sv_type;

// Tandem Duplication
// In case of a small deletion inside of a duplication, the distance is a small positive value
if (cluster.get_common_tandem_dup_count() > 0 && static_cast<uint64_t>(distance) <= args.max_tol_deleted_length)
{
// Increment position by 1 because INV mate1 points on its last element
record.pos() += 1;
record.alt() = {"<INV>"};
record.alt() = {"<DUP:TANDEM>"};
// Increment end by 1 because VCF is 1-based
// Increment end by 1 because inversion ends one base before mate2 begins
record.info().push_back({.id = "END", .value = mate2.position + 1});
sv_length = distance;
sv_length_iGenVar = sv_length;
sv_type = "INV";
sv_length = distance + 2;
sv_length_iGenVar = insert_size;
sv_type = "DUP";
}
// Deletion
// In case of a small insertion inside of an deletion, the insert_size is a small positive value.
else if (insert_size <= args.max_tol_inserted_length)
// Deletion OR Inversion
else if (distance > 0)
{
record.alt() = {"<DEL>"};
// Inversion
// An Inversion consists of 2 Breakpoints, thus it looks like a deletion with an inserted sequence
if (insert_size >= args.min_var_length)
{
// Increment position by 1 because INV mate1 points on its last element
record.pos() += 1;
record.alt() = {"<INV>"};
// Increment end by 1 because VCF is 1-based
// Increment end by 1 because inversion ends one base before mate2 begins
record.info().push_back({.id = "END", .value = mate2.position + 1});
sv_length = distance;
sv_length_iGenVar = sv_length;
sv_type = "INV";
}
// Deletion
// In case of a small insertion inside of a deletion, the insert_size is a small positive value.
else if (insert_size <= args.max_tol_inserted_length)
{
record.alt() = {"<DEL>"};
// Increment end by 1 because VCF is 1-based
// Decrement end by 1 because deletion ends one base before mate2 begins
record.info().push_back({.id = "END", .value = mate2.position});
sv_length = -distance;
sv_length_iGenVar = sv_length;
sv_type = "DEL";
}
}
// Insertion (sv_length is positive)
// In case of a small deletion inside of an insertion, the distance is a small positive value
else if (insert_size > 0 && static_cast<uint64_t>(distance) <= args.max_tol_deleted_length)
{
record.alt() = {"<INS>"};
// Increment end by 1 because VCF is 1-based
// Decrement end by 1 because deletion ends one base before mate2 begins
record.info().push_back({.id = "END", .value = mate2.position});
sv_length = -distance;
record.info().push_back({.id = "END", .value = mate1.position + 1});
sv_length = insert_size;
sv_length_iGenVar = sv_length;
sv_type = "DEL";
sv_type = "INS";
}
// The SVLEN is neither too short nor too long than specified by the user.
if (std::abs(sv_length) >= args.min_var_length &&
std::abs(sv_length) <= args.max_var_length)
{
record.info().push_back({.id = "SVLEN", .value = sv_length});
record.info().push_back({.id = "iGenVar_SVLEN", .value = sv_length_iGenVar});
record.info().push_back({.id = "SVTYPE", .value = sv_type});
found_SV = true;
}
}
// Insertion (sv_length is positive)
// In case of a small deletion inside of an insertion, the distance is a small positive value
else if (insert_size > 0 && static_cast<uint64_t>(distance) <= args.max_tol_deleted_length)
{
record.alt() = {"<INS>"};
// Increment end by 1 because VCF is 1-based
record.info().push_back({.id = "END", .value = mate1.position + 1});
sv_length = insert_size;
sv_length_iGenVar = sv_length;
sv_type = "INS";
}
// The SVLEN is neither too short nor too long than specified by the user.
if (std::abs(sv_length) >= args.min_var_length &&
std::abs(sv_length) <= args.max_var_length)
{
record.info().push_back({.id = "SVLEN", .value = sv_length});
record.info().push_back({.id = "iGenVar_SVLEN", .value = sv_length_iGenVar});
record.info().push_back({.id = "SVTYPE", .value = sv_type});
found_SV = true;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ rule run_iGenVar:
vcf = "results/caller_comparison_iGenVar_only/{input_combination}/variants.vcf"
log:
"logs/caller_comparison_iGenVar_only/{input_combination}_output.log"
threads: 2
threads: 4
run:
if wildcards.input_combination == 'S1': # Illumina Paired End
short_bam = config["short_read_bam"]["s1"]
Expand Down
8 changes: 4 additions & 4 deletions test/data/datasources.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ declare_datasource (FILE single_end_mini_example.sam
# copies file to <build>/data/output_err.txt
declare_datasource (FILE output_err.txt
URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/output_err.txt
URL_HASH SHA256=587d7d853a713cc6cb1af211a95190e6bd50b261160f7222d318c0841e9f16ff)
URL_HASH SHA256=f44b6522c6df97506a70e6cc961c4d2b1caf2cef3b245688d327ce22c7425133)

# copies file to <build>/data/output_res.vcf
declare_datasource (FILE output_res.vcf
URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/output_res.vcf
URL_HASH SHA256=7dd5c100edaecf8227e92825aeb49649a8b1c3f459d047d67687b36b6f7cdaa9)
URL_HASH SHA256=9b70052748679b2e29d4985ad9864380c6f7f66f56c6797f73421dd2598a1455)

# copies file to <build>/data/output_short_and_long_err.txt
declare_datasource (FILE output_short_and_long_err.txt
URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/output_short_and_long_err.txt
URL_HASH SHA256=a4aa69df5a9adfd6573e33686fb2c434cea21775420869c08070e458a5786714)
URL_HASH SHA256=568ed8a913fc05f31f91051df6ef17be3b55c0afd79c5f6c657ad6342f897fd2)

# copies file to <build>/data/output_short_and_long_res.vcf
declare_datasource (FILE output_short_and_long_res.vcf
URL ${CMAKE_SOURCE_DIR}/test/data/mini_example/output_short_and_long_res.vcf
URL_HASH SHA256=6ca496cf1c10699cf93c2c32dad2005d3ee5a4c9fd1a391c7eed81abd0b74376)
URL_HASH SHA256=71b92fb734be1fa5b4d5973207920c17e8a2d02d9351a413fb401401eb500ad4)
2 changes: 1 addition & 1 deletion test/data/mini_example/output_err.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,4 @@ Inverted bases: TAGCAACTCTCCAAAAC
Start clustering...
Done with clustering. Found 21 junction clusters.
No refinement was selected.
Detected 17 SVs.
Detected 14 SVs.
3 changes: 0 additions & 3 deletions test/data/mini_example/output_res.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,3 @@ chr1 368 . N <INS> 5 PASS END=368;SVLEN=11;iGenVar_SVLEN=11;SVTYPE=INS GT ./.
chr1 384 . N <DEL> 1 PASS END=395;SVLEN=-11;iGenVar_SVLEN=-11;SVTYPE=DEL GT ./.
chr1 509 . N <DUP:TANDEM> 1 PASS END=529;SVLEN=21;iGenVar_SVLEN=9;SVTYPE=DUP GT ./.
chr1 581 . N <INV> 5 PASS END=598;SVLEN=17;iGenVar_SVLEN=17;SVTYPE=INV GT ./.
chr1 267 . N <DEL> 3 PASS END=285;SVLEN=-18;iGenVar_SVLEN=-18;SVTYPE=DEL GT ./.
chr1 283 . N <DEL> 3 PASS END=298;SVLEN=-15;iGenVar_SVLEN=-15;SVTYPE=DEL GT ./.
chr1 509 . N <DEL> 1 PASS END=528;SVLEN=-19;iGenVar_SVLEN=-19;SVTYPE=DEL GT ./.
2 changes: 1 addition & 1 deletion test/data/mini_example/output_short_and_long_err.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,4 @@ Inverted bases: TAGCAACTCTCCAAAAC
Start clustering...
Done with clustering. Found 21 junction clusters.
No refinement was selected.
Detected 17 SVs.
Detected 14 SVs.
3 changes: 0 additions & 3 deletions test/data/mini_example/output_short_and_long_res.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,3 @@ chr1 368 . N <INS> 5 PASS END=368;SVLEN=11;iGenVar_SVLEN=11;SVTYPE=INS GT ./.
chr1 384 . N <DEL> 1 PASS END=395;SVLEN=-11;iGenVar_SVLEN=-11;SVTYPE=DEL GT ./.
chr1 509 . N <DUP:TANDEM> 1 PASS END=529;SVLEN=21;iGenVar_SVLEN=9;SVTYPE=DUP GT ./.
chr1 581 . N <INV> 5 PASS END=598;SVLEN=17;iGenVar_SVLEN=17;SVTYPE=INV GT ./.
chr1 267 . N <DEL> 3 PASS END=285;SVLEN=-18;iGenVar_SVLEN=-18;SVTYPE=DEL GT ./.
chr1 283 . N <DEL> 3 PASS END=298;SVLEN=-15;iGenVar_SVLEN=-15;SVTYPE=DEL GT ./.
chr1 509 . N <DEL> 1 PASS END=528;SVLEN=-19;iGenVar_SVLEN=-19;SVTYPE=DEL GT ./.

0 comments on commit 344ccb4

Please sign in to comment.