Skip to content

Commit f676392

Browse files
Merge pull request #293 from neherlab/fix/parsimony-reconstruction
fix: reconstruction of ambiguous nucs in ancestral parsimony mode
2 parents dd992e4 + d7eeb0f commit f676392

File tree

3 files changed

+33
-30
lines changed

3 files changed

+33
-30
lines changed

packages/treetime/src/alphabet/alphabet.rs

+31-27
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ pub enum AlphabetName {
2929
}
3030

3131
pub type ProfileMap = IndexMap<char, Array1<f64>>;
32+
pub type StateSetMap = IndexMap<char, StateSet>;
33+
pub type CharToSet = IndexMap<char, StateSet>;
34+
pub type SetToChar = IndexMap<StateSet, char>;
3235

3336
#[derive(Clone, Debug, Serialize, Deserialize)]
3437
pub struct Alphabet {
@@ -43,6 +46,11 @@ pub struct Alphabet {
4346
treat_gap_as_unknown: bool,
4447
profile_map: ProfileMap,
4548

49+
#[serde(skip)]
50+
char_to_set: IndexMap<char, StateSet>,
51+
#[serde(skip)]
52+
set_to_char: IndexMap<StateSet, char>,
53+
4654
#[serde(skip)]
4755
char_to_index: Vec<Option<usize>>,
4856
#[serde(skip)]
@@ -124,6 +132,18 @@ impl Alphabet {
124132
index_to_char.push(c);
125133
}
126134

135+
let char_to_set = {
136+
let mut char_to_set: CharToSet = canonical.iter().map(|c| (c, StateSet::from_char(c))).collect();
137+
ambiguous.iter().for_each(|(key, chars)| {
138+
char_to_set.insert(*key, StateSet::from_iter(chars));
139+
});
140+
char_to_set.insert(*gap, StateSet::from_char(*gap));
141+
char_to_set.insert(*unknown, StateSet::from_char(*unknown));
142+
char_to_set
143+
};
144+
145+
let set_to_char: SetToChar = char_to_set.iter().map(|(&c, &s)| (s, c)).collect();
146+
127147
Ok(Self {
128148
all,
129149
char_to_index,
@@ -137,25 +157,11 @@ impl Alphabet {
137157
gap: *gap,
138158
treat_gap_as_unknown: *treat_gap_as_unknown,
139159
profile_map,
160+
char_to_set,
161+
set_to_char,
140162
})
141163
}
142164

143-
/// Resolve possible ambiguity of the given character to the set of canonical chars
144-
pub fn disambiguate(&self, c: char) -> StateSet {
145-
// If unknown then could be any canonical (e.g. N => { A, C, G, T })
146-
if self.is_unknown(c) {
147-
self.canonical().collect()
148-
}
149-
// If ambiguous (e.g. R => { A, G })
150-
else if let Some(resolutions) = self.ambiguous.get(&c) {
151-
resolutions.iter().copied().collect()
152-
}
153-
// Otherwise it's not ambiguous and it's the char itself (incl. gap)
154-
else {
155-
once(c).collect()
156-
}
157-
}
158-
159165
#[inline]
160166
pub fn get_profile(&self, c: char) -> &Array1<f64> {
161167
self
@@ -178,7 +184,7 @@ impl Alphabet {
178184
{
179185
let mut profile = Array1::<f64>::zeros(self.n_canonical());
180186
for c in chars {
181-
let chars = self.disambiguate(*c.borrow());
187+
let chars = self.char_to_set(*c.borrow());
182188
for c in chars.iter() {
183189
let index = self.index(c);
184190
profile[index] = 1.0;
@@ -206,6 +212,14 @@ impl Alphabet {
206212
Ok(prof)
207213
}
208214

215+
pub fn set_to_char(&self, c: StateSet) -> char {
216+
self.set_to_char[&c]
217+
}
218+
219+
pub fn char_to_set(&self, c: char) -> StateSet {
220+
self.char_to_set[&c]
221+
}
222+
209223
/// All existing characters (including 'unknown' and 'gap')
210224
pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
211225
self.all.iter()
@@ -466,16 +480,6 @@ mod tests {
466480
use indoc::indoc;
467481
use pretty_assertions::assert_eq;
468482

469-
#[test]
470-
fn test_disambiguate() -> Result<(), Report> {
471-
let alphabet = Alphabet::new(AlphabetName::Nuc, false)?;
472-
assert_eq!(stateset! {'A', 'G'}, alphabet.disambiguate('R'));
473-
assert_eq!(stateset! {'A', 'C', 'G', 'T'}, alphabet.disambiguate('N'));
474-
assert_eq!(stateset! {'C'}, alphabet.disambiguate('C'));
475-
assert_eq!(stateset! {alphabet.gap()}, alphabet.disambiguate(alphabet.gap()));
476-
Ok(())
477-
}
478-
479483
#[test]
480484
fn test_alphabet_nuc() -> Result<(), Report> {
481485
let alphabet = Alphabet::new(AlphabetName::Nuc, false)?;

packages/treetime/src/commands/ancestral/fitch.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -509,8 +509,7 @@ pub fn ancestral_reconstruction_fitch(
509509
}
510510

511511
for (&pos, &states) in &node.fitch.variable {
512-
seq[pos] = states.first().unwrap();
513-
// seq[pos] = alphabet.ambiguate(states).first().unwrap();
512+
seq[pos] = alphabet.set_to_char(states);
514513
}
515514

516515
node.sequence = seq.clone();

packages/treetime/src/representation/graph_sparse.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ impl SparseSeqNode {
9494
.iter()
9595
.enumerate()
9696
.filter(|(_, &c)| alphabet.is_ambiguous(c))
97-
.map(|(pos, &c)| (pos, alphabet.disambiguate(c)))
97+
.map(|(pos, &c)| (pos, alphabet.char_to_set(c)))
9898
.collect();
9999

100100
let seq_dis = ParsimonySeqDis {

0 commit comments

Comments
 (0)