@@ -29,6 +29,9 @@ pub enum AlphabetName {
29
29
}
30
30
31
31
pub type ProfileMap = IndexMap < char , Array1 < f64 > > ;
32
+ pub type StateSetMap = IndexMap < char , StateSet > ;
33
+ pub type CharToSet = IndexMap < char , StateSet > ;
34
+ pub type SetToChar = IndexMap < StateSet , char > ;
32
35
33
36
#[ derive( Clone , Debug , Serialize , Deserialize ) ]
34
37
pub struct Alphabet {
@@ -43,6 +46,11 @@ pub struct Alphabet {
43
46
treat_gap_as_unknown : bool ,
44
47
profile_map : ProfileMap ,
45
48
49
+ #[ serde( skip) ]
50
+ char_to_set : IndexMap < char , StateSet > ,
51
+ #[ serde( skip) ]
52
+ set_to_char : IndexMap < StateSet , char > ,
53
+
46
54
#[ serde( skip) ]
47
55
char_to_index : Vec < Option < usize > > ,
48
56
#[ serde( skip) ]
@@ -124,6 +132,18 @@ impl Alphabet {
124
132
index_to_char. push ( c) ;
125
133
}
126
134
135
+ let char_to_set = {
136
+ let mut char_to_set: CharToSet = canonical. iter ( ) . map ( |c| ( c, StateSet :: from_char ( c) ) ) . collect ( ) ;
137
+ ambiguous. iter ( ) . for_each ( |( key, chars) | {
138
+ char_to_set. insert ( * key, StateSet :: from_iter ( chars) ) ;
139
+ } ) ;
140
+ char_to_set. insert ( * gap, StateSet :: from_char ( * gap) ) ;
141
+ char_to_set. insert ( * unknown, StateSet :: from_char ( * unknown) ) ;
142
+ char_to_set
143
+ } ;
144
+
145
+ let set_to_char: SetToChar = char_to_set. iter ( ) . map ( |( & c, & s) | ( s, c) ) . collect ( ) ;
146
+
127
147
Ok ( Self {
128
148
all,
129
149
char_to_index,
@@ -137,25 +157,11 @@ impl Alphabet {
137
157
gap : * gap,
138
158
treat_gap_as_unknown : * treat_gap_as_unknown,
139
159
profile_map,
160
+ char_to_set,
161
+ set_to_char,
140
162
} )
141
163
}
142
164
143
- /// Resolve possible ambiguity of the given character to the set of canonical chars
144
- pub fn disambiguate ( & self , c : char ) -> StateSet {
145
- // If unknown then could be any canonical (e.g. N => { A, C, G, T })
146
- if self . is_unknown ( c) {
147
- self . canonical ( ) . collect ( )
148
- }
149
- // If ambiguous (e.g. R => { A, G })
150
- else if let Some ( resolutions) = self . ambiguous . get ( & c) {
151
- resolutions. iter ( ) . copied ( ) . collect ( )
152
- }
153
- // Otherwise it's not ambiguous and it's the char itself (incl. gap)
154
- else {
155
- once ( c) . collect ( )
156
- }
157
- }
158
-
159
165
#[ inline]
160
166
pub fn get_profile ( & self , c : char ) -> & Array1 < f64 > {
161
167
self
@@ -178,7 +184,7 @@ impl Alphabet {
178
184
{
179
185
let mut profile = Array1 :: < f64 > :: zeros ( self . n_canonical ( ) ) ;
180
186
for c in chars {
181
- let chars = self . disambiguate ( * c. borrow ( ) ) ;
187
+ let chars = self . char_to_set ( * c. borrow ( ) ) ;
182
188
for c in chars. iter ( ) {
183
189
let index = self . index ( c) ;
184
190
profile[ index] = 1.0 ;
@@ -206,6 +212,14 @@ impl Alphabet {
206
212
Ok ( prof)
207
213
}
208
214
215
+ pub fn set_to_char ( & self , c : StateSet ) -> char {
216
+ self . set_to_char [ & c]
217
+ }
218
+
219
+ pub fn char_to_set ( & self , c : char ) -> StateSet {
220
+ self . char_to_set [ & c]
221
+ }
222
+
209
223
/// All existing characters (including 'unknown' and 'gap')
210
224
pub fn chars ( & self ) -> impl Iterator < Item = char > + ' _ {
211
225
self . all . iter ( )
@@ -466,16 +480,6 @@ mod tests {
466
480
use indoc:: indoc;
467
481
use pretty_assertions:: assert_eq;
468
482
469
- #[ test]
470
- fn test_disambiguate ( ) -> Result < ( ) , Report > {
471
- let alphabet = Alphabet :: new ( AlphabetName :: Nuc , false ) ?;
472
- assert_eq ! ( stateset! { 'A' , 'G' } , alphabet. disambiguate( 'R' ) ) ;
473
- assert_eq ! ( stateset! { 'A' , 'C' , 'G' , 'T' } , alphabet. disambiguate( 'N' ) ) ;
474
- assert_eq ! ( stateset! { 'C' } , alphabet. disambiguate( 'C' ) ) ;
475
- assert_eq ! ( stateset! { alphabet. gap( ) } , alphabet. disambiguate( alphabet. gap( ) ) ) ;
476
- Ok ( ( ) )
477
- }
478
-
479
483
#[ test]
480
484
fn test_alphabet_nuc ( ) -> Result < ( ) , Report > {
481
485
let alphabet = Alphabet :: new ( AlphabetName :: Nuc , false ) ?;
0 commit comments