diff --git a/lib/App/Sandy/Error.pm b/lib/App/Sandy/Error.pm new file mode 100644 index 00000000..9b72b7f3 --- /dev/null +++ b/lib/App/Sandy/Error.pm @@ -0,0 +1,32 @@ +package App::Sandy::Error; +# ABSTRACT: Base class for error models + +use App::Sandy::Base 'class'; + +# VERSION + +has '_not_base' => ( + is => 'ro', + isa => 'HashRef', + builder => '_build_not_base', + lazy_build => 1 +); + +sub _build_not_base { + my %not_base = ( + A => ['T', 'C', 'G'], + a => ['t', 'c', 'g'], + T => ['A', 'C', 'G'], + t => ['a', 'c', 'g'], + C => ['A', 'T', 'G'], + c => ['a', 't', 'g'], + G => ['A', 'T', 'C'], + g => ['a', 't', 'c'] + ); + return \%not_base; +} + +sub randb { + my ($self, $base, $rng) = @_; + return $self->_not_base->{$base}[$rng->get_n(3)] || $base; +} diff --git a/lib/App/Sandy/Error/Phred.pm b/lib/App/Sandy/Error/Phred.pm new file mode 100644 index 00000000..61285c0d --- /dev/null +++ b/lib/App/Sandy/Error/Phred.pm @@ -0,0 +1,37 @@ +package App::Sandy::Error::Phred; +# ABSTRACT: Calculate error based on the phred score + +use App::Sandy::Base 'class'; + +use constant ASCII_INIT => 0x21; + +extends 'App::Sandy::Error'; + +# VERSION + +sub insert_sequencing_error { + my ($self, $read_ref, $quality_ref, $read_size, $rng) = @_; + my @errors; + + for (my $i = 0; $i < $read_size; $i++) { + my $char = substr $$quality_ref, $i, 1; + + if ($self->_is_there_any_error($char, $rng)) { + my $base = substr $$read_ref, $i, 1; + my $not_base = $self->randb($base, $rng); + + substr($$read_ref, $i, 1) = $not_base; + + push @errors => sprintf("%d:%s/%s", $i + 1, $base, $not_base); + } + } + + return \@errors; +} + +sub _is_there_any_error { + my ($self, $char, $rng) = @_; + my $score = ord($char) - ASCII_INIT; + my $prob = 10 ** (-$score / 10); + return int($rng->uniform() * (1 / $prob)) == 0; +} diff --git a/t/lib/TestsFor/App/Sandy/Error.pm b/t/lib/TestsFor/App/Sandy/Error.pm new file mode 100644 index 00000000..b88121fa --- /dev/null +++ b/t/lib/TestsFor/App/Sandy/Error.pm @@ -0,0 +1,45 @@ +package TestsFor::App::Sandy::Error; +# ABSTRACT: Tests for 'App::Sandy::Error' class + +use App::Sandy::Base 'test'; +use App::Sandy::RNG; + +use autodie; +use base 'TestsFor'; + +use constant SEED => 17; + +sub startup : Tests(startup) { + my $test = shift; + $test->SUPER::startup; + my $class = ref $test; + $class->mk_classdata('default_error'); + $class->mk_classdata('rng'); +} + +sub setup : Tests(setup) { + my $test = shift; + my %child_arg = @_; + $test->SUPER::setup; + + $test->default_error($test->class_to_test->new()); + $test->rng(App::Sandy::RNG->new(SEED)); +} + +sub not_base : Test(16) { + my $test = shift; + + my $class = $test->class_to_test; + my $error = $test->default_error; + my $rng = $test->rng; + + my @bases = (qw/A T C G a t c g/); + + for my $base (@bases) { + my $not_base = $error->randb($base, $rng); + isnt $base, $not_base, + "base '$base' should not be equal ~base '$not_base'"; + like $not_base, qr/[ATCGatcg]/, + "~base '$not_base' should be in [ATCGatcg]"; + } +} diff --git a/t/lib/TestsFor/App/Sandy/Error/Phred.pm b/t/lib/TestsFor/App/Sandy/Error/Phred.pm new file mode 100644 index 00000000..02dbac5f --- /dev/null +++ b/t/lib/TestsFor/App/Sandy/Error/Phred.pm @@ -0,0 +1,57 @@ +package TestsFor::App::Sandy::Error::Phred; +# ABSTRACT: Tests for 'App::Sandy::Error::Phred' class + +use App::Sandy::Base 'test'; +use App::Sandy::RNG; +use base 'TestsFor::App::Sandy::Error'; + +use autodie; +use base 'TestsFor'; + +sub startup : Tests(startup) { + my $test = shift; + $test->SUPER::startup; +} + +sub setup : Tests(setup) { + my $test = shift; + my %child_arg = @_; + $test->SUPER::setup; + + $test->default_error($test->class_to_test->new()); +} + +sub insert_error : Tests(3) { + my $test = shift; + + my $class = $test->class_to_test; + my $error = $test->default_error; + my $rng = $test->rng; + + my $read = 'ATCG' x 10; + my %dep = ( + '!' => { + min => 39, max => 41, + msg => sub{ "n_errors '$_[0]' should be between ]39,41[" } + }, + '$' => { + min => 14, max => 26, + msg => sub{ "n_errors '$_[0]' should be between ]14,26[" } + }, + '%' => { + min => 10, max => 23, + msg => sub{ "n_errors '$_[0]' should be between ]10,23[" } + } + ); + + for my $char (keys %dep) { + my $quality = $char x 40; + + my $errors_a = $error->insert_sequencing_error(\$read, + \$quality, 40, $rng); + my $n_errors = scalar @$errors_a; + + ok $n_errors < $dep{$char}{max} && $n_errors > $dep{$char}{min}, + $dep{$char}{msg}->($n_errors); + } +}