Skip to content

Commit

Permalink
Bump batmass to 1.32.4 for partial mzbin reading
Browse files Browse the repository at this point in the history
Add LocalizationLikelihood class for faster likelihood access
Add localizationLikelihoodMap in IterativeLocalizer for faster likelihood access
Refactored IterativeLocalizer.localizePsm function so that shared ions are only extracted from the spectrum during IterativeLocalizer.computePoissonBinomialLikelihood if the likelihood is not already computed
Add specNum attribute to PSM class
Bump to rc4

This commit results in a 45% speedup to IterativeLocalizer.calculateLocalizationProbabilities
  • Loading branch information
danielgeiszler committed Apr 11, 2024
1 parent 19521ff commit a910af3
Show file tree
Hide file tree
Showing 8 changed files with 140 additions and 15 deletions.
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ java {
targetCompatibility = JavaVersion.VERSION_1_9
}

version = '3.0.0-rc3'
version = '3.0.0-rc4'

application {
// Define the main class for the application
Expand Down Expand Up @@ -64,7 +64,7 @@ def slf4jVersion = '1.7.26'

dependencies {
implementation 'org.apache.commons:commons-math3:3.6.1'
implementation ('com.github.chhh:batmass-io:1.32.3') {
implementation ('com.github.chhh:batmass-io:1.32.4') {
exclude group: 'org.slf4j', module: 'slf4j-api'
}
// https://mvnrepository.com/artifact/org.apache.commons/commons-math3
Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion lib/batmass-io-1.32.3.pom → lib/batmass-io-1.32.4.pom
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.github.chhh</groupId>
<artifactId>batmass-io</artifactId>
<version>1.32.3</version>
<version>1.32.4</version>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
Expand Down
15 changes: 14 additions & 1 deletion src/edu/umich/andykong/ptmshepherd/PSMFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public class PSM {
private int lineNum; // 0 indexed starting from header, 1 indexed starting from data
private ArrayList<String> spLine;
private String spec;
int specNum;
private int specNum;
private String pep;
private ArrayList<ImmutablePair<Integer, Float>> mods;
private float [] modArr;
Expand All @@ -70,6 +70,7 @@ public class PSM {
this.lineNum = lineNum;
this.spLine = new ArrayList<>(Arrays.asList(line.replace("\n","").split("\t", -1)));
this.spec = null;
this.specNum = -1;
this.pep = null;
this.mods = null;
this.modArr = null;
Expand Down Expand Up @@ -97,6 +98,18 @@ public String getSpec() {
this.spec = reNormName(spLine.get(getColumn("Spectrum")));
return this.spec;
}

public int getSpecNum() {
if (this.specNum == -1) {
if (this.spec == null) {
this.getSpec();
}
String[] spSpec = this.spec.split("\\.", -1);
this.specNum = Integer.parseInt(spSpec[spSpec.length-2]);
}
return this.specNum;
}

public String getPep() {
if (this.pep == null)
this.pep = spLine.get(getColumn("Peptide"));
Expand Down
2 changes: 1 addition & 1 deletion src/edu/umich/andykong/ptmshepherd/PTMShepherd.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
public class PTMShepherd {

public static final String name = "PTM-Shepherd";
public static final String version = "3.0.0-rc3";
public static final String version = "3.0.0-rc4";

static HashMap<String,String> params;
static TreeMap<String,ArrayList<String []>> datasets;
Expand Down
35 changes: 35 additions & 0 deletions src/edu/umich/andykong/ptmshepherd/core/MXMLReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ExecutorService;

import umich.ms.datatypes.LCMSDataSubset;
import umich.ms.datatypes.scan.IScan;
import umich.ms.datatypes.scancollection.impl.ScanCollectionDefault;
Expand Down Expand Up @@ -187,6 +189,28 @@ public void readFully(LCMSDataSource<?> source) throws Exception {
};
}

public void readPartially(ArrayList<Integer> scanNums) throws Exception {
String fn = f.toPath().getFileName().toString().toLowerCase();
MZBINFile mzbinSource = null;

if (fn.endsWith(".mzbin") || fn.endsWith(".mzbin_cache")) {
mzbinSource = new MZBINFile(this.threads, f, false);
mzbinSource.loadMZBINScans(PTMShepherd.executorService, this.threads, scanNums);
}
if (mzbinSource == null) {
System.out.println("Cannot partially read non-mzBin file: " + f.getName());
System.exit(1);
}

specsByName = new HashMap<>();
specsByStrippedName = new HashMap<>();
readAsPartialMzBIN(mzbinSource);
for(int i = 0; i < specs.length; i++) {
specsByName.put(specs[i].scanName, specs[i]);
specsByStrippedName.put(stripChargeState(specs[i].scanName), specs[i]);
}
}

//400ngHeLaosmoothCE20-52lowguessSRIG450easy4_30tbl1_0NOexp12scansi_A1_01_3366.109793.109793.2
private void readAsMzBIN(MZBINFile mf) {
List<Spectrum> cspecs = new ArrayList<>();
Expand All @@ -199,6 +223,17 @@ private void readAsMzBIN(MZBINFile mf) {
cspecs.toArray(specs);
}

private void readAsPartialMzBIN(MZBINFile mf) {
List<Spectrum> cspecs = new ArrayList<>();
for (MZBINSpectrum mzbinSpectrum : mf.specs) {
if (mzbinSpectrum.msLevel == 2)
cspecs.add(new Spectrum(mzbinSpectrum, mf.runName));
}
nSpecs = cspecs.size();
specs = new Spectrum[nSpecs];
cspecs.toArray(specs);
}

private void readAsFraggerMGF(MSFMGFFile mf) {
List<Spectrum> cspecs = new ArrayList<>();
for (Spectrum spectrum : mf.specs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,12 @@ public class IterativeLocalizer {

static String pep;
static int scanNum;

Map<String, LocalizationLikelihood> localizationLikelihoodMap;
static boolean debugFlag;
boolean printIonDistribution = true; // TODO make this a parameter
boolean poissonBinomialDistribution = true; // TODO make this a parameter
static int epoch;
int seed = 3341;
Random rng;

Expand Down Expand Up @@ -191,12 +194,15 @@ private void calculateLocalizationProbabilities() throws Exception {
// Set up missing spectra error handling
ArrayList<String> linesWithoutSpectra = new ArrayList<>(); //TODO

// Set up likelihood store so that values can be accessed without recomputing
this.localizationLikelihoodMap = new HashMap<>();

// Faster access to zero bin spectra to be ignored
double zbL = this.peaks[1][this.zeroBin]; // TODO: set up custom bounds?
double zbR = this.peaks[2][this.zeroBin]; // TODO: set up custom bounds?

long t1 = System.currentTimeMillis();
int epoch = 1;
epoch = 1;
int totalBins = this.peaks[0].length - 1;
int convergedBins = 0;
boolean finalPass = false;
Expand Down Expand Up @@ -278,7 +284,7 @@ private void calculateLocalizationProbabilities() throws Exception {
// Calculate site-specific localization probabilities
float[] mods = psm.getModsAsArray();
boolean[] allowedPoses = parseAllowedPositions(pep, this.allowedAAs, mods);
double[] siteProbs = localizePsm(spec, pep, mods, dMassApex, cBin, allowedPoses); // TODO check whether raw, theoretical, or peakapex is better
double[] siteProbs = localizePsm(psm, spec, pep, mods, dMassApex, cBin, allowedPoses); // TODO check whether raw, theoretical, or peakapex is better

// Update prior probabilities
if (!finalPass)
Expand All @@ -297,7 +303,7 @@ private void calculateLocalizationProbabilities() throws Exception {
Peptide decoyPep = Peptide.generateDecoy(pep, mods, this.rng, "mutated");
boolean[] decoyAllowedPoses = parseAllowedPositions(decoyPep.pepSeq,
this.allowedAAs, decoyPep.mods);
double[] decoySiteProbs = localizePsm(spec, decoyPep.pepSeq, decoyPep.mods, dMassApex,
double[] decoySiteProbs = localizePsm(psm, spec, decoyPep.pepSeq, decoyPep.mods, dMassApex,
cBin, decoyAllowedPoses);
double decoyMaxProb = findMaxLocalizationProbability(decoySiteProbs);
String decoyMaxProbAA = findMaxLocalizationProbabilitySite(decoySiteProbs, decoyPep.pepSeq);
Expand Down Expand Up @@ -328,6 +334,7 @@ private void calculateLocalizationProbabilities() throws Exception {
}
}
}

long t3 = System.currentTimeMillis();

// If analysis is complete and results have been written, exit loop
Expand Down Expand Up @@ -670,7 +677,7 @@ boolean isDecoyAA(char aa) {
* @param allowedPoses array of allowed positions based on peptide sequence localization restrictions TODO add mods
* @return double[] of localization probabilities
*/
private double[] localizePsm (Spectrum spec, String pep, float[] mods, float dMass, int cBin, boolean[] allowedPoses) {
private double[] localizePsm (PSMFile.PSM psm, Spectrum spec, String pep, float[] mods, float dMass, int cBin, boolean[] allowedPoses) {
double[] sitePriorProbs;
double[] siteLikelihoods = new double[pep.length()+2];
double marginalProb = 0.0;
Expand Down Expand Up @@ -737,7 +744,14 @@ private double[] localizePsm (Spectrum spec, String pep, float[] mods, float dMa
mods[i] -= dMass;
}
**/
siteLikelihoods = computePoissonBinomialLikelihood(pep, mods, dMass, allowedPoses, reducedMzs, reducedInts);

// Check to see if the likelihood has already been computed. If it has, grab it. If not, compute it.
if (this.localizationLikelihoodMap.containsKey(psm.getSpec())) {
siteLikelihoods = this.localizationLikelihoodMap.get(psm.getSpec()).getMod().getSiteLikelihoods();
} else {
siteLikelihoods = computePoissonBinomialLikelihood(pep, mods, dMass, allowedPoses, spec);
this.localizationLikelihoodMap.put(psm.getSpec(), new LocalizationLikelihood(dMass, siteLikelihoods));
}


// Propagate terminal AA likelihoods to each terminus //TODO this isn't going to handle cases when termini are allowed but the first residue isnt
Expand Down Expand Up @@ -778,17 +792,46 @@ private double[] localizePsm (Spectrum spec, String pep, float[] mods, float dMa
}

/**
* Computes the likelihood P(Spec_i|Pep_{ij}) of all localization sites using the Poisson Binomial model. Matched
* ions currently map to the adapted PTMiner function.
* Computes the likelihood P(Spec_i|Pep_{ij}) of all localization sites using the Poisson Binomial model.
* @param pep peptide sequence as string
* @param mods modifications on the peptides as float array
* @param allowedPoses allowed positions on the peptides as boolean array
* @param peakMzs reduced peak M/Z float array matching at least one site
* @param peakInts reduced peak intensity float array matching at least one site
* @param spec Spectrum object containing peaks
* @return likelihoods of each site as double array
*/
private double[] computePoissonBinomialLikelihood(String pep, float[] mods, float dMass, boolean[] allowedPoses,
float[] peakMzs, float[] peakInts) {
Spectrum spec) {
// First calculate the set of shifted and unshifted ions
ArrayList<Float> pepFrags = Peptide.calculatePeptideFragments(pep, mods, this.ionTypes, 1);
ArrayList<Float> shiftedPepFrags = new ArrayList<Float>(pepFrags.size());
for (Float frag : pepFrags)
shiftedPepFrags.add(frag + dMass);
pepFrags.addAll(shiftedPepFrags);

if (debugFlag)
System.out.println(pepFrags.stream().map(Object::toString)
.collect(Collectors.joining(", ")));

// Filter peakMzs and peakInts to only those that match at least one ion
float[] peakMzs = spec.getPeakMZ();
float[] peakInts = spec.getPeakInt();
float[] matchedAtLeastOneIons = findMatchedIons(pepFrags, peakMzs, peakInts)[0]; // Returns -1 if unmatched, intensity otherwise // [0] is intensities, [1] is mass errors TODO rewrite
int matchedCount = 0;
for (int i = 0; i < matchedAtLeastOneIons.length; i++) {
if (matchedAtLeastOneIons[i] > 0.0)
matchedCount++;
}
float[] reducedMzs = new float[matchedCount];
float[] reducedInts = new float[matchedCount];
int j = 0;
for (int i = 0; i < matchedAtLeastOneIons.length; i++) {
if (matchedAtLeastOneIons[i] > 0.0) {
reducedMzs[j] = peakMzs[i];
reducedInts[j] = peakInts[i];
j++;
}
}

// Set up structures to hold site matched ion probabilities
int nAllowedPoses = 0;
for (int i = 1; i < allowedPoses.length-1; i++) { // Ignore C- and N-term, will be propogated at the end
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package edu.umich.andykong.ptmshepherd.iterativelocalization;

import java.util.ArrayList;

public class LocalizationLikelihood {
ArrayList<Mod> mods;

LocalizationLikelihood() {
this.mods = new ArrayList<>();
}

LocalizationLikelihood(float dMass, double[] siteLikelihoods) {
this.mods = new ArrayList<>();
this.mods.add(new Mod(dMass, siteLikelihoods));
}

public Mod getMod() { //todo variable mod searches will require multiple modifications
return mods.get(0);
}

public class Mod {
float dMass;
double[] siteLikelihoods;

Mod(float dMass, double[] siteLikelihoods) {
this.dMass = dMass;
this.siteLikelihoods = siteLikelihoods;
}

public double[] getSiteLikelihoods() {
return this.siteLikelihoods;
}
}
}

0 comments on commit a910af3

Please sign in to comment.