Skip to content

Commit

Permalink
New Peptide decoy generation options for Peptide utility module
Browse files Browse the repository at this point in the history
Two dimensional q-value calculation for fragments
Bump to 3.0.0
  • Loading branch information
danielgeiszler committed Jan 25, 2024
1 parent 83c1a52 commit 665058e
Show file tree
Hide file tree
Showing 12 changed files with 435 additions and 57 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ java {
targetCompatibility = JavaVersion.VERSION_1_8
}

version = '2.1.0'
version = '3.0.0'

application {
// Define the main class for the application
Expand Down
2 changes: 1 addition & 1 deletion src/edu/umich/andykong/ptmshepherd/PTMShepherd.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
public class PTMShepherd {

public static final String name = "PTM-Shepherd";
public static final String version = "2.1.0";
public static final String version = "3.0.0";

static HashMap<String,String> params;
static TreeMap<String,ArrayList<String []>> datasets;
Expand Down
32 changes: 32 additions & 0 deletions src/edu/umich/andykong/ptmshepherd/core/AAMutationRules.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package edu.umich.andykong.ptmshepherd.core;

import java.util.HashMap;
import java.util.Map;

public class AAMutationRules {
public static Map<Character, Character> mutateFrom = new HashMap<>();

// These rules are taken from DIA-NN
static {
mutateFrom.put('G', 'L');
mutateFrom.put('A', 'L');
mutateFrom.put('V', 'L');
mutateFrom.put('L', 'V');
mutateFrom.put('I', 'V');
mutateFrom.put('F', 'L');
mutateFrom.put('M', 'L');
mutateFrom.put('P', 'L');
mutateFrom.put('W', 'L');
mutateFrom.put('S', 'T');
mutateFrom.put('C', 'S');
mutateFrom.put('T', 'S');
mutateFrom.put('Y', 'S');
mutateFrom.put('H', 'S');
mutateFrom.put('K', 'L');
mutateFrom.put('R', 'L');
mutateFrom.put('Q', 'N');
mutateFrom.put('E', 'D');
mutateFrom.put('N', 'Q');
mutateFrom.put('D', 'E');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import edu.umich.andykong.ptmshepherd.core.MXMLReader;
import edu.umich.andykong.ptmshepherd.core.Spectrum;
import edu.umich.andykong.ptmshepherd.utils.Peptide;
import edu.umich.andykong.ptmshepherd.utils.StringParsingUtils;
import org.apache.commons.lang3.ArrayUtils;

import java.io.File;
import java.text.DecimalFormat;
Expand Down Expand Up @@ -151,7 +149,7 @@ private void fitMatchedIonDistribution() throws Exception {
this.matchedIonDist.addIons(matchedIonIntensities, matchedIonMassErrors, false);

// Add decoy peptide values to matched ion histogram
Peptide decoyPep = Peptide.generateDecoy(pep, mods, this.rng);
Peptide decoyPep = Peptide.generateDecoy(pep, mods, this.rng, "mutated");
ArrayList<Float> decoySitePepFrags = Peptide.calculatePeptideFragments(
decoyPep.pepSeq, decoyPep.mods, this.ionTypes, 1);
float[][] decoyMatchedIons = findMatchedIons(decoySitePepFrags, peakMzs, peakInts);
Expand All @@ -169,7 +167,7 @@ private void fitMatchedIonDistribution() throws Exception {
this.matchedIonDist.calculateIonPosterior();
if (this.printIonDistribution) {
this.matchedIonDist.printIntensityHisto("matched_ion_intensity_distribution.tsv");
this.matchedIonDist.printMassErrorHisto("matched_ion_mass_error_distribution.tsv");
//this.matchedIonDist.printMassErrorHisto("matched_ion_mass_error_distribution.tsv");
}

long t2 = System.currentTimeMillis();
Expand Down Expand Up @@ -285,17 +283,19 @@ private void calculateLocalizationProbabilities() throws Exception {
strEntropies.add(entropyToString(locEntropy));

// TODO check decoy usefulness
Peptide decoyPep = Peptide.generateDecoy(pep, mods, this.rng);
Peptide decoyPep = Peptide.generateDecoy(pep, mods, this.rng, "mutated");
boolean[] decoyAllowedPoses = parseAllowedPositions(decoyPep.pepSeq,
this.allowedAAs, decoyPep.mods);
double[] decoySiteProbs = localizePsm(spec, decoyPep.pepSeq, decoyPep.mods, dMassApex,
cBin, decoyAllowedPoses);
double decoyMaxProb = findMaxLocalizationProbability(decoySiteProbs);
String decoyMaxProbAA = findMaxLocalizationProbabilitySite(decoySiteProbs, decoyPep.pepSeq);
if (decoyMaxProb >= maxProb) {
System.out.println(specName + "\t" + pep + "\t" + decoyPep.pepSeq + "\t" +
maxProbToString(maxProb, maxProbAA) + "\t" +
maxProbToString(decoyMaxProb, decoyMaxProbAA));
if (debugFlag) {
if (decoyMaxProb >= maxProb) {
System.out.println(specName + "\t" + pep + "\t" + decoyPep.pepSeq + "\t" +
maxProbToString(maxProb, maxProbAA) + "\t" +
maxProbToString(decoyMaxProb, decoyMaxProbAA));
}
}

}
Expand Down Expand Up @@ -653,27 +653,7 @@ private double[] localizePsm (Spectrum spec, String pep, float[] mods, float dMa
// Iterate through sites to compute likelihood for each site P(Spec_i|Pep_{ij})
// There are no ions that can differentiate termini and terminal AAs, so the likelihood for each terminus
// is equal to the proximal AA
// System.out.println(spec.toString());
/**
for(int i = 0; i < pep.length(); i++) {
if (allowedPoses[i+1] == false) {
siteLikelihoods[i+1] = 0.0;
continue;
}
mods[i] += dMass;
ArrayList pepFrags = Peptide.calculatePeptideFragments(pep, mods, this.ionTypes, 1);
if (debugFlag) {
System.out.println(pep);
System.out.println("Position " + (i + 1));
System.out.println(spec.toString());
}
float[] peakMzs = spec.getPeakMZ();
float[] peakInts = spec.getPeakInt();
siteLikelihoods[i+1] = computeLikelihood(pepFrags, peakMzs, peakInts);
mods[i] -= dMass;
}
**/
//NEW FUNCTION STARTS HERE

// First calculate the set of shifted and unshifted ions
ArrayList<Float> pepFrags = Peptide.calculatePeptideFragments(pep, mods, this.ionTypes, 1);
ArrayList<Float> shiftedPepFrags = new ArrayList<Float>(pepFrags.size());
Expand Down Expand Up @@ -797,7 +777,7 @@ private double[] computePoissonBinomialLikelihood(String pep, float[] mods, floa
float[] matchedIonIntensities = matchedIons[0];
float[] matchedIonMassErrors = matchedIons[1];
// Map matched ion intensities to MatchedIonDistribution, negative intensities will be returned as -1
double[] matchedIonProbabilities = this.matchedIonDist.calcIonProbabilities(matchedIonIntensities); //TODO input mass errors here
double[] matchedIonProbabilities = this.matchedIonDist.calcIonProbabilities(matchedIonIntensities, matchedIonMassErrors); //TODO input mass errors here
if (debugFlag) {
System.out.println("Matched ions");
System.out.println(pep);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ public class MatchedIonDistribution {
float resolution;
int binMult;

TwoDimJointPMF targetPMF;
TwoDimJointPMF decoyPMF;
TwoDimJointQValue qValues;

public String mode = "ptminer-unmatched-theoretical";

public MatchedIonDistribution(float resolution, boolean poissonBinomial) {
Expand All @@ -29,8 +33,11 @@ public MatchedIonDistribution(float resolution, boolean poissonBinomial) {
this.pdfDecoy = new int[((int) (100.0 * this.binMult + 1))];
this.pdfMassError = new int[100]; // Default 100 ppm max, can make it dynamic TODO
this.pdfMassErrorDecoy = new int[100]; // Default 100 ppm max, can make it dynamic TODO
if (poissonBinomial)
if (poissonBinomial) {
this.mode = "poissonbinomial-matched-theoretical-decoy";
this.targetPMF = new TwoDimJointPMF(101, 3001, true, true);
this.decoyPMF = new TwoDimJointPMF(101, 3001, true, true);
}
}

// Original was ptminer mode
Expand Down Expand Up @@ -84,6 +91,7 @@ public void addIon(float intensity, boolean isDecoy) {
* @param massError
* @param isDecoy
*/
/**
public void addIon(float intensity, float massError, boolean isDecoy) {
if (!isDecoy) {
if (intensity > 0.0f) { // Only include matched ions, unmatched ions have negative intensity
Expand All @@ -101,6 +109,22 @@ public void addIon(float intensity, float massError, boolean isDecoy) {
}
}
}
**/
public void addIon(float intensity, float massError, boolean isDecoy) {
if (!isDecoy) {
if (intensity > 0.0f) { // Only include matched ions, unmatched ions have negative intensity
int intIndx = (int) (intensity);
int massErrorIndx = (int) (massError * 100.0);
this.targetPMF.addVal(intIndx, massErrorIndx);
}
} else {
if (intensity > 0.0f) { // Only include matched ions, unmatched ions have negative intensity
int intIndx = (int) (intensity);
int massErrorIndx = (int) (massError * 100.0);
this.decoyPMF.addVal(intIndx, massErrorIndx);
}
}
}


public void addIons(float [] intensities) {
Expand All @@ -121,19 +145,6 @@ public void addIons(float [] intensities, boolean isDecoy) {
}
}

public void addIon_Original(float intensity) {
int idx = (int) (intensity * this.binMult);
this.pdf[idx]++;
}

public void addIons_Original(float [] intensities) {
for(int i = 0; i < intensities.length; i++) {
if (intensities[i] < 0)
continue;
addIon(intensities[i]);
}
}

public void calculateCdf() {
this.cdf = new double[((int) (100.0 * this.binMult + 1))];
int sum = 0;
Expand All @@ -157,10 +168,15 @@ public void calculateCdf() {
this.cdf[this.cdf.length-1] = this.cdf[this.cdf.length-2] = lastBinsAverage;
}

/**
public void calculateIonPosterior() {
calculateIonIntensityPosterior();
calculateIonMassErrorPosterior();
}
**/
public void calculateIonPosterior() {
this.qValues = new TwoDimJointQValue(this.targetPMF, this.decoyPMF);
}

private void calculateIonIntensityPosterior() {
// Calculate local q-val estimate //todo terminology
Expand Down Expand Up @@ -271,6 +287,29 @@ public double[] calcIonProbabilities(float [] intensities) {
return probs;
}

public double calcIonProbability(float intensity, float massError) {
double prob;
if (intensity < 0)
prob = -1;
else {
if (!this.mode.equals("poissonbinomial-matched-theoretical-decoy")) {
prob = this.cdf[(int) intensity * this.binMult];
} else {
int intIndx = (int) intensity;
int massErrorIndx = (int) massError;
prob = 1 - this.qValues.getQVal(intIndx, massErrorIndx);
}
}
return prob;
}

public double[] calcIonProbabilities(float [] intensities, float[] massErrors) {
double[] probs = new double[intensities.length];
for (int i = 0; i < intensities.length; i++)
probs[i] = calcIonProbability(intensities[i], massErrors[i]);
return probs;
}

public String intensityToString() {
StringBuffer sb = new StringBuffer();

Expand All @@ -281,11 +320,7 @@ public String intensityToString() {
+ "\t" + this.pdf[i] + "\n");
}
} else {
sb.append("intensity\tcount_target\tcount_decoy\tion_PEP\n");
for (int i = 0; i < this.pdf.length; i++) {
sb.append(new DecimalFormat("0.00").format((double) i / (double) this.binMult)
+ "\t" + this.pdf[i] + "\t" + this.pdfDecoy[i] + "\t" + this.ionPosterior[i] + "\n");
}
sb.append(this.qValues.toString());
}
return sb.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public static double calculateProbXGreaterThanY(double[] xIonProbs, double[] yIo
nMatchedIonsX++;
if (yIonProbs[i] > 0.0)
nMatchedIonsY++;
if (xIonProbs[i] > 0.0 && yIonProbs[i] > 0.0)
if (xIonProbs[i] > 0.0 && (xIonProbs[i] == yIonProbs[i]))
nSharedIons++;
}
// Set up arrays to be convolved
Expand All @@ -150,7 +150,6 @@ public static double calculateProbXGreaterThanY(double[] xIonProbs, double[] yIo

// Construct Binomial Poisson parameters for X and Y
// Dimensions are [nIons][2], [i][0] value holds chance of 0 (fail); [i][1] holds chance of 1 (success)
// I am very proud of this logic... it does max 2 checks for redundancy and a match to either X or Y
int xIonPoint = 0;
int yIonPoint = 0;
for (int i = 0; i < xIonProbs.length; i++) {
Expand All @@ -160,7 +159,8 @@ public static double calculateProbXGreaterThanY(double[] xIonProbs, double[] yIo
xBinPoiParams[xIonPoint][0] = 1.0 - xIonProbs[i]; // chance of fail
xBinPoiParams[xIonPoint][1] = xIonProbs[i]; // chance of success
xIonPoint++;
} else {
}
if (yIonProbs[i] > 0.0){
yBinPoiParams[yIonPoint][0] = 1.0 - yIonProbs[i]; // chance of fail
yBinPoiParams[yIonPoint][1] = yIonProbs[i]; // chance of success
yIonPoint++;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package edu.umich.andykong.ptmshepherd.iterativelocalization;

public class TwoDimJointCMF {
double probGrid[][];
int countGrid[][];
int nVals;
TwoDimJointPMF PMF;
boolean direction1;
boolean direction2;

public TwoDimJointCMF(TwoDimJointPMF PMF, boolean direction1, boolean direction2) {
this.probGrid = new double[PMF.getXDim()][PMF.getYDim()];
this.countGrid = new int[PMF.getXDim()][PMF.getYDim()];
this.PMF = PMF;
this.nVals = PMF.nVals;
this.direction1 = direction1;
this.direction2 = direction2;

calculateCMF();
}

private void calculateCMF() {
// First two loops are to calculate this for every value
for (int i1 = 0; i1 < getXDim(); i1++) {
for (int j1 = 0; j1 < getYDim(); j1++) {
int count = 0; // Number of vals to be integrated
// These two loops are the values that will be integrated
for (int i2 = 0; i2 <= i1; i2++) { // Sum values up to X (intensity)
for (int j2 = j1; j2 >= 0; j2--) { // Sum values great than Y (mass error)
count++;
}
}
this.countGrid[i1][j1] = count;
this.probGrid[i1][j1] = (double) count / (double) this.nVals;
}
}
}

public int getXDim() {
return this.countGrid.length;
}

public int getYDim() {
return this.countGrid[0].length;
}

public String toString() {
StringBuffer sb = new StringBuffer();

sb.append("count grid");
for (int j = 0; j < getXDim(); j++) {
sb.append(j + ".\t");
}
sb.append("\n");
for (int i = 0; i < getYDim(); i++) {
sb.append(i + ".\t");
for (int j = 0; j < getXDim(); j++)
sb.append(String.format("%d\t", this.countGrid[j][i]));
sb.append("\n");
}
sb.append("\t");
sb.append("\n");

sb.append("prob grid\n");
for (int i = 0; i < getYDim(); i++) {
sb.append(i + ".\t");
for (int j = 0; j < getXDim(); j++)
sb.append(String.format("%.2f\t", this.probGrid[j][i]));
sb.append("\n");
}
sb.append("\t");
for (int j = 0; j < getXDim(); j++) {
sb.append(j + ".\t\t");
}
sb.append("\n");
return sb.toString();
}


public static void main(String args[]) {
TwoDimJointPMF tmpPMF = new TwoDimJointPMF(2,4,true,true);
tmpPMF.addVal(0, 0);
tmpPMF.addVal(0, 1);
tmpPMF.addVal(0, 2);
tmpPMF.addVal(0, 3);
tmpPMF.addVal(1, 0);
tmpPMF.addVal(1, 1);
tmpPMF.addVal(1, 2);
tmpPMF.addVal(1, 3);

TwoDimJointCMF tmpCMF = new TwoDimJointCMF(tmpPMF, true, true);

System.out.println(tmpCMF);

}
}
Loading

0 comments on commit 665058e

Please sign in to comment.