Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MASST XIC #572

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions search_single_spectrum/test/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import sys

sys.path.insert(0, "../tools/search_single_spectrum")

def test():
import xic_masst
new_record_df = xic_masst.process_masst_xic("test_data/SEARCH_SINGLE_SPECTRUM-7e961554-view_all_spectra_datasets_matched-main.tsv")

print(new_record_df)
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
cluster_scan dataset_id filename filescan metadata
295334 MSV000084900 f.MSV000084900/ccms_peak/Global_Foodomics_composite_data_2020/MSV000083010_core/G74022_5x_BG11_01_18395.mzML 574
295334 MSV000084900 f.MSV000084900/ccms_peak/Global_Foodomics_composite_data_2020/MSV000083010_core/G74022_5x_BG11_01_18395.mzML 570
300 MSV000084237 f.MSV000084237/ccms_peak/Raw/std_mix3_Fe_conc46_1_long.mzML 840
300 MSV000084237 f.MSV000084237/ccms_peak/Raw/std_mix2_4.mzML 947
300 MSV000084237 f.MSV000084237/ccms_peak/Raw/std_mix2_4.mzML 941
300 MSV000084237 f.MSV000084237/ccms_peak/Raw/std_mix2_4.mzML 938
131379 MSV000083010 f.MSV000083010/ccms_peak/Samples/G74022_5x_BG11_01_18395.mzML 574 GNPS - GFOP - Food - Beverage - Data set 1|not applicable|not applicable|reverse phase (C18)|not applicable|United States of America|not applicable|not applicable|not specified|not applicable|not applicable|sulfamethazine;sulfadimethoxine|electrospray ionization (positive)|not specified|not applicable|impact HD|MS:1002667|not specified|not specified|solid material, frozen|ethanol-water (19:1)|food|food_source_animal|G74022|not applicable|not applicable|not applicable|MSV000083010_G74022|2018
131379 MSV000083010 f.MSV000083010/ccms_peak/Samples/G74022_5x_BG11_01_18395.mzML 570 GNPS - GFOP - Food - Beverage - Data set 1|not applicable|not applicable|reverse phase (C18)|not applicable|United States of America|not applicable|not applicable|not specified|not applicable|not applicable|sulfamethazine;sulfadimethoxine|electrospray ionization (positive)|not specified|not applicable|impact HD|MS:1002667|not specified|not specified|solid material, frozen|ethanol-water (19:1)|food|food_source_animal|G74022|not applicable|not applicable|not applicable|MSV000083010_G74022|2018
132950 MSV000082074 f.MSV000082074/ccms_peak/Samples/G74022_5x_BG11_01_18395.mzML 574
132950 MSV000082074 f.MSV000082074/ccms_peak/Samples/G74022_5x_BG11_01_18395.mzML 570
18313 MSV000081936 f.MSV000081936/ccms_peak/animal/G74022_5x_BG11_01_18395.mzML 574
18313 MSV000081936 f.MSV000081936/ccms_peak/animal/G74022_5x_BG11_01_18395.mzML 570
5636 MSV000081657 f.MSV000081657/ccms_peak/data_mzxml/Samples/G74022_5x_BG11_01_18395.mzML 574
5636 MSV000081657 f.MSV000081657/ccms_peak/data_mzxml/Samples/G74022_5x_BG11_01_18395.mzML 570
3558643 MSV000080673 f.MSV000080673/ccms_peak/2017.AmericanGut3K.mzXMLfiles/Samples/000036206_RC2_01_5354.mzML 763 GNPS_AmericanGut3K_dataset|14|female|reverse phase (C18)|no disease reported|United States of America|disease NOS|disease NOS|5.8|unhealthy (NOS)|Urban|sulfamethizole;sulfachloropyridazine|electrospray ionization (positive)|40.7|-74|Adolescence (8 yrs < x <= 18 yrs)|impact HD|MS:1002667|9606|Homo sapiens|10/10/2015 13:15|swabs, dry|ethanol-water (9:1)|animal|biofluid|36206|not applicable|feces|UBERON:0001988|MSV000080673_36206|2017
3558643 MSV000080673 f.MSV000080673/ccms_peak/2017.AmericanGut3K.mzXMLfiles/Samples/000036206_RC2_01_5354.mzML 760 GNPS_AmericanGut3K_dataset|14|female|reverse phase (C18)|no disease reported|United States of America|disease NOS|disease NOS|5.8|unhealthy (NOS)|Urban|sulfamethizole;sulfachloropyridazine|electrospray ionization (positive)|40.7|-74|Adolescence (8 yrs < x <= 18 yrs)|impact HD|MS:1002667|9606|Homo sapiens|10/10/2015 13:15|swabs, dry|ethanol-water (9:1)|animal|biofluid|36206|not applicable|feces|UBERON:0001988|MSV000080673_36206|2017
3558643 MSV000080673 f.MSV000080673/ccms_peak/2017.AmericanGut3K.mzXMLfiles/Samples/000011075_RH2_01_6313.mzML 845 GNPS_AmericanGut3K_dataset|44|female|reverse phase (C18)|no disease reported|United States of America|disease NOS|disease NOS|17.3|unhealthy (NOS)|Urban|sulfamethizole;sulfachloropyridazine|electrospray ionization (positive)|37|-122|Early Adulthood (18 yrs < x <= 45 yrs)|impact HD|MS:1002667|9606|Homo sapiens|1/4/2016 9:00|swabs, dry|ethanol-water (9:1)|animal|biofluid|11075|not applicable|feces|UBERON:0001988|MSV000080673_11075|2017
3558643 MSV000080673 f.MSV000080673/ccms_peak/2017.AmericanGut3K.mzXMLfiles/Samples/000011075_RH2_01_6313.mzML 840 GNPS_AmericanGut3K_dataset|44|female|reverse phase (C18)|no disease reported|United States of America|disease NOS|disease NOS|17.3|unhealthy (NOS)|Urban|sulfamethizole;sulfachloropyridazine|electrospray ionization (positive)|37|-122|Early Adulthood (18 yrs < x <= 45 yrs)|impact HD|MS:1002667|9606|Homo sapiens|1/4/2016 9:00|swabs, dry|ethanol-water (9:1)|animal|biofluid|11075|not applicable|feces|UBERON:0001988|MSV000080673_11075|2017
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn15_CL21_F11.mzXML 721
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn15_CL21_F07.mzXML 749
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn15_CL21_F07.mzXML 709
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn12_CL21_F03.mzXML 753
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn12_CL21_F03.mzXML 711
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn12_CL21_F03.mzXML 673
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn12_CL21_E11.mzXML 730
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn12_CL21_E11.mzXML 691
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn11_CL21_E07.mzXML 705
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn11_CL21_E03.mzXML 737
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn11_CL21_E03.mzXML 699
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D11.mzXML 753
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D11.mzXML 713
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D11.mzXML 675
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D08.mzXML 726
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D07.mzXML 744
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn10_Bsubt_D07.mzXML 700
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn08_Bsubt_D03.mzXML 728
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn08_Bsubt_D03.mzXML 688
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn08_Bsubt_C11.mzXML 737
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn08_Bsubt_C11.mzXML 698
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn05_control_B11.mzXML 697
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn05_control_B07.mzXML 728
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn05_control_B07.mzXML 683
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn04_control_B03.mzXML 733
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn04_control_B03.mzXML 693
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn04_control_B02.mzXML 699
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn02_control_A07.mzXML 706
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn02_control_A07.mzXML 666
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn02_control_A03.mzXML 733
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn02_control_A03.mzXML 692
22273 MSV000079888 f.MSV000079888/ccms_peak/mzXML/Corn02_control_A03.mzXML 649
104 changes: 104 additions & 0 deletions search_single_spectrum/tools/search_single_spectrum/xic_masst.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env python

import os
import pymzml
import pandas as pd
import shutil
import urllib.request as request
from contextlib import closing



def process_masst_xic(input_filename):
df = pd.read_csv(input_filename, sep="\t")

ms2_records = df.to_dict(orient="records")
for record in ms2_records:
# Making data file available
ftp_url = "ftp://massive.ucsd.edu/" + record["filename"][2:]
local_filename = os.path.basename(record["filename"])
with closing(request.urlopen(ftp_url)) as r:
with open(local_filename, 'wb') as f:
shutil.copyfileobj(r, f)

# Finding the scan
run = pymzml.run.Reader(local_filename)

target_scan = record["filescan"]
for spectrum in run:
if str(spectrum.ID) == str(target_scan):
selected_precursors = spectrum.selected_precursors
precursor_dict = selected_precursors[0]
precursor_mz = precursor_dict["mz"]
precursor_i = precursor_dict["i"]

record["precursor_mz"] = precursor_mz
record["precursor_i"] = precursor_i
record["rt"] = spectrum.scan_time_in_minutes()

# Perform XIC
target_mz = record["precursor_mz"]
lower_rt = record["rt"] - 0.1
upper_rt = record["rt"] + 0.1
run = pymzml.run.Reader(local_filename, MS_precisions={1 : 5e-6, 2 : 20e-6})
time_dependent_intensities = []

for spectrum in run:
spectrum_rt = float(spectrum.scan_time_in_minutes())
if spectrum_rt < lower_rt or spectrum_rt > upper_rt:
continue

if spectrum.ms_level == 1:
has_peak_matches = spectrum.has_peak(target_mz)
if has_peak_matches != []:
for mz, I in has_peak_matches:
time_dependent_intensities.append(
[spectrum.scan_time_in_minutes(), I, mz]
)

intensity = sum([peak[2] for peak in time_dependent_intensities])
record["xic_sum"] = (intensity)

print(record)

os.remove(local_filename)

return pd.DataFrame(ms2_records)


def main():
"""
Demonstration of the extraction of a specific ion chromatogram, i.e. XIC or EIC

All intensities and m/z values for a target m/z are extracted.

usage:

./extract_ion_chromatogram.py

"""

example_file = os.path.join(
os.path.dirname(__file__), os.pardir, "tests", "data", "example.mzML"
)
run = pymzml.run.Reader(example_file)
time_dependent_intensities = []

MZ_2_FOLLOW = 70.06575775

for spectrum in run:
if spectrum.ms_level == 1:
has_peak_matches = spectrum.has_peak(MZ_2_FOLLOW)
if has_peak_matches != []:
for mz, I in has_peak_matches:
time_dependent_intensities.append(
[spectrum.scan_time_in_minutes(), I, mz]
)
print("RT \ti \tmz")
for rt, i, mz in time_dependent_intensities:
print("{0:5.3f}\t{1:13.4f}\t{2:10}".format(rt, i, mz))
return


if __name__ == "__main__":
main()
3 changes: 2 additions & 1 deletion workflow-integration-misc-tests/test_tasks.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ task_id,description,regressioncountviews
adc3ec9194de43f7b97a61369bf2d7f6,ms2lda,
73da384ea02a4e8ca3edd82649e540c3,msms-chooser,
1df2ea6e406447dfaa7a8b3ea23bf9e1,batchvalidator,new_annotations
2b7b4e189177498f8f95ea217ff351ff,lc_mzmine2 small test,
2b7b4e189177498f8f95ea217ff351ff,lc_mzmine2 small test,
3484239c925b46dda8c1bf5a7e6d592b,sirius,compound_identifications_summary;compound_formula_summary