Skip to content

Commit

Permalink
Complete lazy computation for PSMFile class, speeds up parsing by avo…
Browse files Browse the repository at this point in the history
…iding unnecessary string splitting
  • Loading branch information
danielgeiszler committed Apr 13, 2024
1 parent 8d7c324 commit 85cee56
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 10 deletions.
66 changes: 61 additions & 5 deletions src/edu/umich/andykong/ptmshepherd/PSMFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ public class PSM {

PSM(int lineNum, String line) {
this.lineNum = lineNum;
this.spLine = new ArrayList<>(StringParsingUtils.splitStringByTab(line.replace("\n","")));
this.spLine = new ArrayList<>(StringParsingUtils.splitStringByTab(
line.replace("\n","")));
this.fileName = null;
this.spec = null;
this.specNum = -1;
Expand All @@ -81,29 +82,58 @@ public class PSM {
this.dMass = null;
}

PSM(int lineNum) {
this.lineNum = lineNum;
this.spLine = null;
this.fileName = null;
this.spec = null;
this.specNum = -1;
this.pep = null;
this.mods = null;
this.modArr = null;
this.dMass = null;
}

private void calculateSpLine() {
this.spLine = new ArrayList<>(StringParsingUtils.splitStringByTab(
data.get(this.lineNum).replace("\n","")));
}

public void addValAtColumn(int colIdx, String val){
if (this.spLine == null)
this.calculateSpLine();
this.spLine.add(colIdx, val);
}

public void addValAtColumn(String colName, String val){
if (this.spLine == null)
this.calculateSpLine();
this.spLine.add(getColumn(colName), val);
}

public void replaceValAtColumn(int colIdx, String val){
if (this.spLine == null)
this.calculateSpLine();
this.spLine.set(colIdx, val);
}

public void replaceValAtColumn(String colName, String val){
if (this.spLine == null)
this.calculateSpLine();
this.spLine.set(getColumn(colName), val);
}

public String getSpec() {
if (this.spLine == null)
this.calculateSpLine();
if (this.spec == null)
this.spec = reNormName(spLine.get(getColumn("Spectrum")));
return this.spec;
}

public int getSpecNum() {
if (this.spLine == null)
this.calculateSpLine();
if (this.specNum == -1) {
if (this.spec == null) {
this.getSpec();
Expand All @@ -115,18 +145,24 @@ public int getSpecNum() {
}

public String getFileName() {
if (this.spLine == null)
this.calculateSpLine();
if (this.fileName == null)
this.fileName = this.getSpec().substring(0, this.getSpec().indexOf("."));
return this.fileName;
}

public String getPep() {
if (this.spLine == null)
this.calculateSpLine();
if (this.pep == null)
this.pep = spLine.get(getColumn("Peptide"));
return this.pep;
}

public ArrayList<ImmutablePair<Integer,Float>> getMods() {
if (this.spLine == null)
this.calculateSpLine();
if (this.mods == null) {
this.mods = new ArrayList<>();
String strMods = spLine.get(getColumn("Assigned Modifications"));
Expand All @@ -152,6 +188,8 @@ else if (spos.equals("c"))
}

public float [] getModsAsArray() {
if (this.spLine == null)
this.calculateSpLine();
if (this.modArr == null) {
this.modArr = new float[this.getPep().length()];
Arrays.fill(this.modArr, 0.0f);
Expand All @@ -166,15 +204,25 @@ else if (spos.equals("c"))
}

public float getDMass() {
if (this.spLine == null)
this.calculateSpLine();
if (this.dMass == null)
this.dMass = Float.parseFloat(this.spLine.get(getColumn("Delta Mass")));
return this.dMass;
}

public String getColumnValue(String colName) {
if (this.spLine == null)
this.calculateSpLine();
return this.spLine.get(getColumn(colName));
}

public ArrayList<String> getSpLine() {
if (this.spLine == null)
this.calculateSpLine();
return this.spLine;
}

public String toString() {
String newStr = String.join("\t", this.spLine);
return newStr;
Expand Down Expand Up @@ -223,7 +271,7 @@ public static String getCRC32(File f) throws Exception {
* @return PSM
*/
private PSM getRawLine(int i) {
PSM psm = new PSM(i, data.get(i));
PSM psm = new PSM(i);
return psm;
}

Expand Down Expand Up @@ -809,15 +857,19 @@ public PSMFile(File f) throws Exception {
}
in.close();

// Build index of PSM file lines to spectrum names and pre parse spec names
this.scanToLine = new HashMap<>();
// Prefill PSM array with dummy variables so they can be constructed upon being called
for (int i = 0; i < this.data.size(); i++) {
PSM tPSM = this.getRawLine(i);
psms.add(tPSM);
this.scanToLine.put(tPSM.getSpec(), i);
}
}

private void constructScanToLineMap() {
this.scanToLine = new HashMap<>();
for (int i = 0; i < this.psms.size(); i++)
this.scanToLine.put(this.psms.get(i).getSpec(), i);
}

public void annotateMassDiffs(String [] annotations) throws IOException {
/* find column to modify, overwrite Observed Modifications col if exists */
int annoCol = getColumn("Observed Modifications");
Expand Down Expand Up @@ -936,6 +988,10 @@ public void addColumn(int colIndx, String newHead, ArrayList<String> keys, Array
"editing PSM table will fail\n");
}

// Check that the scan to line mapping has been constructed
if (this.scanToLine == null)
constructScanToLineMap();

//TODO if column not found and inserting to the right, it will insert at the beginning of the table
// This fixes it, but the error message should be different. Not sure how to check colIndx
if (colIndx == 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ public class IterativeLocalizer {
static int scanNum;

Map<String, LocalizationLikelihood> localizationLikelihoodMap;
Map<String, HashMap<String, ArrayList<Integer>>> psmToRunToLine;
Map<String, HashSet<Integer>> convergedPsmsMap;
static boolean debugFlag;
boolean printIonDistribution = true; // TODO make this a parameter
boolean poissonBinomialDistribution = true; // TODO make this a parameter
Expand Down Expand Up @@ -197,6 +199,12 @@ private void calculateLocalizationProbabilities() throws Exception {
// Set up likelihood store so that values can be accessed without recomputing
this.localizationLikelihoodMap = new HashMap<>();

// Set up converged scans map so that PSMs are reprocessed if not necessary
this.convergedPsmsMap = new HashMap<>();

// Set up run -> line mapping so that the whole PSM table doesn't get parsed every iteration
this.psmToRunToLine = new HashMap<>();

// Faster access to zero bin spectra to be ignored
double zbL = this.peaks[1][this.zeroBin]; // TODO: set up custom bounds?
double zbR = this.peaks[2][this.zeroBin]; // TODO: set up custom bounds?
Expand All @@ -216,8 +224,18 @@ private void calculateLocalizationProbabilities() throws Exception {

// Loop through PSM files
for (int i = 0; i < dsData.size(); i++) {
PSMFile psmf = new PSMFile(new File(dsData.get(i)[0]));
HashMap<String, ArrayList<Integer>> runToLine = psmf.getRunMappings();
String psmfStr = dsData.get(i)[0];
PSMFile psmf = new PSMFile(psmfStr);

// Get run to line mappings, if first run calculate, else get preprocessed list to prevent extra parsing
HashMap<String, ArrayList<Integer>> runToLine;
if (epoch == 1) {//todo simplify logic
runToLine = psmf.getRunMappings(); //TODO this is what's parsing it every time
this.psmToRunToLine.put(psmfStr, runToLine);
} else
runToLine = this.psmToRunToLine.get(psmfStr);

// These hold the output to insert into the PSM table //todo only need to be declared on final run
ArrayList<String> specNames = new ArrayList<>();
ArrayList<String> strOutputProbs = new ArrayList<>();
ArrayList<String> strMaxProbs = new ArrayList<>();
Expand All @@ -237,6 +255,15 @@ private void calculateLocalizationProbabilities() throws Exception {
this.scanNum = -1;
debugFlag = false;

// First, check if we can skip this line because the bin is converged
if (!finalPass) {
if (this.convergedPsmsMap.containsKey(cf)) { // check if run is in map
if (this.convergedPsmsMap.get(cf).contains(j)) { // check if scan is in converged bin
continue;
}
}
}

PSMFile.PSM psm = psmf.getLine(j);
float dMass = psm.getDMass();
String pep = psm.getPep();
Expand Down Expand Up @@ -268,9 +295,18 @@ private void calculateLocalizationProbabilities() throws Exception {
continue;
}

// If converged skip unless final round
if (!finalPass && this.priorProbs[cBin].getIsConverged()) // Safe because left is first
// If converged, skip. Unless final round, then calculate final values.
// If converged and not in converged map, add to converged map.
if (!finalPass && this.priorProbs[cBin].getIsConverged()) { // safe because left is first

if (!finalPass) {
if (!this.convergedPsmsMap.containsKey(cf)) // check if run is in map
this.convergedPsmsMap.put(cf, new HashSet<>(runToLine.get(cf).size()+1,1.0f));
if (!this.convergedPsmsMap.get(cf).contains(j)) // check if scan is in converged bin
this.convergedPsmsMap.get(cf).add(j); // add scan to list of converged scans
}
continue;
}

// todo this logic is getting way too complex, need to handle execution states in a static context
Spectrum spec = null;
Expand Down Expand Up @@ -1139,5 +1175,4 @@ private double safeDivide(int x, int y) {
else
return (double) x / (double) y;
}

}

0 comments on commit 85cee56

Please sign in to comment.