From 1018e72cbfd1f95ba6071093b8525792d3f4c3b5 Mon Sep 17 00:00:00 2001 From: danielgeiszler Date: Fri, 12 Apr 2024 00:50:48 +0300 Subject: [PATCH] Only read mzbin files on first and last epochs when doing iterative localization This commit results in a 43% speedup to IterativeLocalizer.calculateLocalizationProbabilities --- .../IterativeLocalizer.java | 56 +++++-------------- 1 file changed, 15 insertions(+), 41 deletions(-) diff --git a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java index 00b7a02..ed8180e 100644 --- a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java +++ b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java @@ -228,7 +228,8 @@ private void calculateLocalizationProbabilities() throws Exception { for (String cf : runToLine.keySet()) { // Load current run mr = new MXMLReader(mzMap.get(ds).get(cf), this.nThreads); - mr.readFully(); + if (epoch == 1 || finalPass) //todo logic too complicated, need to create a state machine + mr.readFully(); // Calculate PSM-level localization probabilities for (int j : runToLine.get(cf)) { @@ -271,10 +272,14 @@ private void calculateLocalizationProbabilities() throws Exception { if (!finalPass && this.priorProbs[cBin].getIsConverged()) // Safe because left is first continue; - Spectrum spec = mr.getSpectrum(specName); - if (spec == null) { - linesWithoutSpectra.add(specName); - continue; + // todo this logic is getting way too complex, need to handle execution states in a static context + Spectrum spec = null; + if (epoch == 1 || finalPass) { // or out of memory dataset + spec = mr.getSpectrum(specName); + if (spec == null) { + linesWithoutSpectra.add(specName); + continue; // todo handle this error + } } //if (specName.equals("02330a_GC1_3990_03_PTM_TrainKit_Rmod_Dimethyl_asymm_200fmol_3xHCD_R1.15210.15210")) { @@ -669,7 +674,8 @@ boolean isDecoyAA(char aa) { * P(Spec_i|Pep_{ij}) -> Likelihood * Sum_{k=0}^{{L_i}+1} P(Pep_{ik})*P(Spec_i|Pep_{ik}) -> Marginal probability * - * @param spec Spectrum class opject containing pre-process mass spectrum + * @param psm PSMFile.PSM object containing PSM information //todo most of the other values don't need to be preparsed if this is passed + * @param spec Spectrum class object containing pre-processed mass spectrum * @param pep pep sequence * @param mods array containing masses to be added on to pep sequence at mods[i] position * @param dMass delta mass of PSM @@ -689,41 +695,6 @@ private double[] localizePsm (PSMFile.PSM psm, Spectrum spec, String pep, float[ else sitePriorProbs = this.priorProbs[cBin].computePriorProbs(pep, allowedPoses); - // Iterate through sites to compute likelihood for each site P(Spec_i|Pep_{ij}) - // There are no ions that can differentiate termini and terminal AAs, so the likelihood for each terminus - // is equal to the proximal AA - - // First calculate the set of shifted and unshifted ions - ArrayList pepFrags = Peptide.calculatePeptideFragments(pep, mods, this.ionTypes, 1); - ArrayList shiftedPepFrags = new ArrayList(pepFrags.size()); - for (Float frag : pepFrags) - shiftedPepFrags.add(frag + dMass); - pepFrags.addAll(shiftedPepFrags); - - if (debugFlag) - System.out.println(pepFrags.stream().map(Object::toString) - .collect(Collectors.joining(", "))); - - // Filter peakMzs and peakInts to only those that match at least one ion - float[] peakMzs = spec.getPeakMZ(); - float[] peakInts = spec.getPeakInt(); - float[] matchedIons = findMatchedIons(pepFrags, peakMzs, peakInts)[0]; // Returns -1 if unmatched, intensity otherwise // [0] is intensities, [1] is mass errors TODO rewrite - int matchedCount = 0; - for (int i = 0; i < matchedIons.length; i++) { - if (matchedIons[i] > 0.0) - matchedCount++; - } - float[] reducedMzs = new float[matchedCount]; - float[] reducedInts = new float[matchedCount]; - int j = 0; - for (int i = 0; i < matchedIons.length; i++) { - if (matchedIons[i] > 0.0) { - reducedMzs[j] = peakMzs[i]; - reducedInts[j] = peakInts[i]; - j++; - } - } - // Iterate through sites to compute likelihood for each site P(Spec_i|Pep_{ij}) // There are no ions that can differentiate termini and terminal AAs, so the likelihood for each terminus // is equal to the proximal AA @@ -745,6 +716,9 @@ private double[] localizePsm (PSMFile.PSM psm, Spectrum spec, String pep, float[ } **/ + // Iterate through sites to compute likelihood for each site P(Spec_i|Pep_{ij}) + // There are no ions that can differentiate termini and terminal AAs, so the likelihood for each terminus + // is equal to the proximal AA // Check to see if the likelihood has already been computed. If it has, grab it. If not, compute it. if (this.localizationLikelihoodMap.containsKey(psm.getSpec())) { siteLikelihoods = this.localizationLikelihoodMap.get(psm.getSpec()).getMod().getSiteLikelihoods();