diff --git a/demo/test_genotyping.vcf b/demo/test_genotyping.vcf index 21e3a92..96c6d7e 100644 --- a/demo/test_genotyping.vcf +++ b/demo/test_genotyping.vcf @@ -1,5 +1,5 @@ ##fileformat=VCFv4.2 -##fileDate=20230810 +##fileDate=20230823 ##INFO= ##INFO= ##INFO= @@ -10,7 +10,7 @@ ##FORMAT= ##FORMAT= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT sample -chr1 15952 . T A . PASS AF=0.208333;UK=62;MA=0 GT:GQ:GL:KC 0/1:79:-70.0138,-4.629e-09,-7.972:2 -chr1 16509 . A G . PASS AF=0.416667;UK=62;MA=0 GT:GQ:GL:KC 0/1:10000:-34.6291,0,-78.37:3 -chr1 16636 . T TA,TAAA . PASS AF=0.375,0.416667;UK=50;MA=0 GT:GQ:GL:KC 1/2:19:-45.0636,-36.96,-35.35,-5.135,-0.004514,-1.986:1 -chr1 18262 . T C . PASS AF=0.916667;UK=62;MA=0 GT:GQ:GL:KC 1/1:10000:-127.447,-32.9,0:3 +chr1 15952 . T A . PASS AF=0.208333;UK=62;MA=0 GT:GQ:GL:KC 0/1:54:-49.4011,-1.686e-06,-5.411:2 +chr1 16509 . A G . PASS AF=0.416667;UK=62;MA=0 GT:GQ:GL:KC 0/1:10000:-29.5279,0,-49.28:3 +chr1 16636 . T TA,TAAA . PASS AF=0.375,0.416667;UK=50;MA=0 GT:GQ:GL:KC 1/2:32:-31.7599,-25.35,-23.76,-4.596,-0.0002714,-3.222:1 +chr1 18262 . T C . PASS AF=0.916667;UK=62;MA=0 GT:GQ:GL:KC 1/1:10000:-94.3729,-31.56,0:3 diff --git a/src/graphbuilder.cpp b/src/graphbuilder.cpp index 982a958..4a98210 100644 --- a/src/graphbuilder.cpp +++ b/src/graphbuilder.cpp @@ -131,8 +131,9 @@ void GraphBuilder::construct_graph(std::string filename, FastaReader* fasta_read current_start_pos -= 1; // if variant is contained in previous one, skip it if ((previous_chrom == current_chrom) && (current_start_pos < previous_end_pos)) { - cerr << "GraphBuilder: skip variant at " << current_chrom << ":" << current_start_pos << " since it is contained in a previous one." << endl; - continue; + stringstream err_msg; + err_msg << "GraphBuilder: variant at " << current_chrom << ":" << current_start_pos << " overlaps previous one. VCF does not represent a pangenome graph." << endl; + throw runtime_error(err_msg.str()); } // get REF allele diff --git a/src/pangenie-genotype.cpp b/src/pangenie-genotype.cpp index 8d777e5..e2bd80e 100644 --- a/src/pangenie-genotype.cpp +++ b/src/pangenie-genotype.cpp @@ -16,7 +16,7 @@ int main(int argc, char* argv[]) { cerr << endl; cerr << "program: PanGenie - genotyping based on kmer-counting and known haplotype sequences." << endl; cerr << "author: Jana Ebler" << endl << endl; - cerr << "version: v3.0.0" << endl; + cerr << "version: v3.0.1" << endl; string reffile = ""; string vcffile = ""; @@ -31,7 +31,7 @@ int main(int argc, char* argv[]) { bool only_genotyping = true; bool only_phasing = false; long double effective_N = 0.00001L; - long double regularization = 0.001L; + long double regularization = 0.01L; bool count_only_graph = true; bool ignore_imputed = false; size_t sampling_size = 0; diff --git a/src/pangenie-index.cpp b/src/pangenie-index.cpp index cf0f45d..9e3b1d8 100644 --- a/src/pangenie-index.cpp +++ b/src/pangenie-index.cpp @@ -15,7 +15,7 @@ int main(int argc, char* argv[]) { cerr << endl; cerr << "program: PanGenie - genotyping based on kmer-counting and known haplotype sequences." << endl; cerr << "author: Jana Ebler" << endl << endl; - cerr << "version: v3.0.0" << endl; + cerr << "version: v3.0.1" << endl; string reffile = ""; string vcffile = ""; diff --git a/tests/GraphBuilderTest.cpp b/tests/GraphBuilderTest.cpp index 13140ac..7a38ddb 100644 --- a/tests/GraphBuilderTest.cpp +++ b/tests/GraphBuilderTest.cpp @@ -283,9 +283,7 @@ TEST_CASE("GraphBuilder overlapping variants", "[GraphBuilder overlapping varian string fasta = "../tests/data/small1.fa"; map> graph; - GraphBuilder(vcf, fasta, graph, "../tests/data/empty-segments.fa", 10, false); - // should have skipped variant that is contained in another - REQUIRE(graph.at("chrA")->size() == 1); + REQUIRE_THROWS(GraphBuilder(vcf, fasta, graph, "../tests/data/empty-segments.fa", 10, false)); } TEST_CASE("GraphBuilder get_chromosomes", "[GraphBuilder get_chromosomes]") {