-
Notifications
You must be signed in to change notification settings - Fork 69
/
Copy pathSingleIndividualSample.java
109 lines (89 loc) · 3.01 KB
/
SingleIndividualSample.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
package phylonet.coalescent;
import java.util.ArrayList;
import java.util.List;
import phylonet.tree.model.MutableTree;
import phylonet.tree.model.Tree;
import phylonet.tree.model.sti.STITree;
import phylonet.util.BitSet;
/**
* This class keeps track of a single-individual sample
* of a multi-individual dataset. For each species,
* we will include only one of its individuals in any instance of this class.
* @author smirarab
*
*/
public class SingleIndividualSample {
/**
* IDs of sampled individuals in the original (global) taxon identifier
*/
private List<Integer> sampleGlobalIDs;
/**
* Name of sampled individuals
*/
private List<String> sampleNames;
/**
* A taxon identifier specifc to this subsample. This
* taxon identifer will only include the individuals sampled.
*/
//private TaxonIdentifier sampleSpecificTaxonIdentifier;
// TODO: maybe we should take the distanc matrix out of this class.
// not sure why it's here.
private SimilarityMatrix similarityMatrix;
/**
* Size of the sample.
*/
private int sampleSize;
public SingleIndividualSample(SpeciesMapper spm, SimilarityMatrix matrix) {
sampleGlobalIDs = new ArrayList<Integer>();
sampleNames = new ArrayList<String>();
//sampleSpecificTaxonIdentifier = new TaxonIdentifier();
//sampleSpecificTaxonIdentifier = GlobalMaps.taxonNameMap.getSpeciesIdMapper().getSTTaxonIdentifier();
/*
* TODO: check if other parts of code need any changes
*/
for (int s = 0; s< spm.getSpeciesCount(); s++){
List<Integer> stTaxa = spm.getTaxaForSpecies(s);
int tid = stTaxa.get(GlobalMaps.random.nextInt(stTaxa.size()));
sampleGlobalIDs.add(tid);
sampleNames.add(GlobalMaps.taxonIdentifier.getTaxonName(tid));
//sampleSpecificTaxonIdentifier.taxonId(sampleNames.get(sampleNames.size()-1));
}
setSampleSize(sampleGlobalIDs.size());
this.similarityMatrix = matrix.getInducedMatrix(this.sampleGlobalIDs);
}
public List<Tree> contractTrees(Iterable<Tree> intrees){
List<Tree> outtrees = new ArrayList<Tree>();
for (Tree tr : intrees) {
STITree ntr = new STITree(tr);
ntr.constrainByLeaves(sampleNames);
outtrees.add(ntr);
}
return outtrees;
}
public Tree contractTree(Tree intree){
STITree ntr = new STITree(intree);
ntr.constrainByLeaves(sampleNames); // sampleNames : GlobalMaps.taxonIdentifier.getTaxonName: gene tree names
GlobalMaps.taxonNameMap.getSpeciesIdMapper().gtToSt2((MutableTree)ntr);
return ntr;
}
// public TaxonIdentifier getTaxonIdentifier() {
// return this.sampleSpecificTaxonIdentifier;
// }
public SimilarityMatrix getSimilarityMatrix() {
return this.similarityMatrix;
}
public int getSampleSize() {
return sampleSize;
}
public void setSampleSize(int sampleSize) {
this.sampleSize = sampleSize;
}
BitSet toOriginalBitSet(BitSet bs) {
BitSet ret = new BitSet(GlobalMaps.taxonIdentifier.taxonCount());
for (int j = bs.nextSetBit(0);
j >= 0; j = bs.nextSetBit(j+1)) {
ret.set(this.sampleGlobalIDs.get(j));
}
return ret;
}
}