Skip to content

[XIC Batch] First Version of XIC Batch Workflow #680

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions xicbatch/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
include ../Makefile.credentials
include ../Makefile.deploytemplate

WORKFLOW_NAME=xicbatch
TOOL_FOLDER_NAME=xicbatch
WORKFLOW_VERSION=release_28
WORKFLOW_DESCRIPTION="xicbatch"
136 changes: 136 additions & 0 deletions xicbatch/tools/xicbatch/calculate_xic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import os
import sys
import numpy as np
import pandas as pd

import argparse
import uuid
import glob
import shutil
from scipy import integrate

def calculate_xic(filename, mz, rt, mz_tolerance, rt_min, rt_max, msaccess_path, feature_name):
temp_result_folder = os.path.join(str(uuid.uuid4()))

mz_lower = mz - mz_tolerance
mz_upper = mz + mz_tolerance

command = 'export LC_ALL=C && {} {} -o {} -x "tic mz={},{} delimiter=tab" --filter "msLevel 1" --filter "scanTime ["{},{}"]"'.format(
msaccess_path, filename, temp_result_folder, mz_lower, mz_upper, rt_min * 60, rt_max * 60)

print(command, mz_lower, mz_upper, mz_tolerance)
os.system(command)

result_filename = glob.glob(os.path.join(temp_result_folder, "*"))[0]
result_df = pd.read_csv(result_filename, sep="\t", skiprows=1)

xic_df = pd.DataFrame()
xic_df["rt"] = result_df["rt"] / 60.0
xic_df["int"] = result_df["sumIntensity"]

# Remove temp folder
shutil.rmtree(temp_result_folder)

return xic_df

def calculate_ms2(filename, mz, rt, mz_tolerance, rt_min, rt_max, msaccess_path, feature_name):
temp_result_folder = os.path.join(str(uuid.uuid4()))

command = 'export LC_ALL=C && {} {} -o {} -x "spectrum_table delimiter=tab" --filter "mzPrecursors [{}] mzTol={} Da" --filter "msLevel 2" --filter "scanTime ["{},{}"]"'.format(
msaccess_path, filename, temp_result_folder, mz, mz_tolerance, rt_min * 60, rt_max * 60)

os.system(command)

result_filename = glob.glob(os.path.join(temp_result_folder, "*"))[0]
result_df = pd.read_csv(result_filename, sep="\t", skiprows=1)

formatted_df = pd.DataFrame()
formatted_df["scan"] = result_df["id"].apply(lambda x: x.split(".")[-1])
formatted_df["filename"] = os.path.basename(filename)
formatted_df["rt"] = result_df["rt"] / 60.0
formatted_df["tic"] = result_df["TIC"]
formatted_df["precursorMZ"] = result_df["precursorMZ"]

print(formatted_df)

# Remove temp folder
shutil.rmtree(temp_result_folder)

return formatted_df


def main():
parser = argparse.ArgumentParser(description='Creating XIC')
parser.add_argument('input_folder', help='input_mgf')
parser.add_argument('output_results', help='output_results')
parser.add_argument('extraction_results', help='extraction_results')
parser.add_argument('ms2_extraction_results', help='ms2_extraction_results')
parser.add_argument('msaccess_path', help='msaccess_path')
parser.add_argument('--mz', default=None, help='mz')
parser.add_argument('--rt', default=None, help='rt')
parser.add_argument('--mztol', default=None, help='mztol')
parser.add_argument('--rttol', default=None, help='rttol')

args = parser.parse_args()

all_input_files = glob.glob(os.path.join(args.input_folder, "*"))

output_list = []
output_full_xic = []
output_ms2 = []
for filename in all_input_files:
mz = float(args.mz)
rt = float(args.rt)

xic_df = calculate_xic(filename,
mz, rt,
float(args.mztol),
float(args.rt) - float(args.rttol),
float(args.rt) + float(args.rttol), args.msaccess_path, str(args.mz))

try:
formatted_df = calculate_ms2(filename, mz, rt,
float(args.mztol),
float(args.rt) - float(args.rttol),
float(args.rt) + float(args.rttol), args.msaccess_path, str(args.mz))
formatted_df["query"] = "{}_{}".format(mz, rt)
output_ms2.append(formatted_df)
except:
pass

xic_df["query"] = "{}_{}".format(mz, rt)
xic_df["filename"] = os.path.basename(filename)

integration_value = integrate.trapz(xic_df["int"], x=xic_df["rt"])

xic_df = xic_df.sort_values(by=['int'], ascending=False)
max_int_rt = xic_df["rt"].iloc[0]
max_int = xic_df["int"].iloc[0]

output_dict = {}
output_dict["filename"] = os.path.basename(filename)
output_dict["integration_value"] = integration_value
output_dict["mz"] = mz
output_dict["rt"] = rt
output_dict["max_int_rt"] = max_int_rt
output_dict["max_int"] = max_int
output_dict["drawing"] = "{}_{}_{}.png".format(os.path.basename(filename), mz, rt)

output_full_xic.append(xic_df)

output_list.append(output_dict)

results_df = pd.DataFrame(output_list)
results_df.to_csv(args.output_results, sep="\t", index=False)

pd.concat(output_full_xic).to_csv(args.extraction_results, sep='\t', index=False)

try:
pd.concat(output_ms2).to_csv(args.ms2_extraction_results, sep='\t', index=False)
except:
pd.DataFrame().to_csv(args.ms2_extraction_results, sep='\t', index=False)
pass


if __name__ == "__main__":
main()
32 changes: 32 additions & 0 deletions xicbatch/tools/xicbatch/demangle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
import sys
import pandas as pd

import argparse
import ming_proteosafe_library

def main():
parser = argparse.ArgumentParser(description='Creating Demangling')
parser.add_argument('input_results', help='input_mgf')
parser.add_argument('output_results', help='output_results')
parser.add_argument('params', help='msaccess_path')
args = parser.parse_args()

params_dict = ming_proteosafe_library.parse_xml_file(open(args.params))
mangled_mapping = ming_proteosafe_library.get_mangled_file_mapping(params_dict)

results_df = pd.read_csv(args.input_results, sep="\t")
results_list = results_df.to_dict(orient="records")

for result in results_list:
filename = result["filename"]
full_ccms_path = mangled_mapping[filename]
result["full_ccms_path"] = full_ccms_path


demanged_results_df = pd.DataFrame(results_list)
demanged_results_df.to_csv(args.output_results, sep="\t", index=False)


if __name__ == "__main__":
main()
56 changes: 56 additions & 0 deletions xicbatch/tools/xicbatch/draw_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
import sys
import pandas as pd

import argparse
import ming_proteosafe_library

from plotnine import *


def main():
parser = argparse.ArgumentParser(description='Creating Demangling')
parser.add_argument('extracted_results', help='extracted_results')
parser.add_argument('output_folder', help='output_folder')

args = parser.parse_args()

extraction_df = pd.read_csv(args.extracted_results, sep="\t")


p = (
ggplot(extraction_df, aes(x='rt', y='int', color='full_ccms_path'))
+ geom_line() # line plot
+ labs(x='RT', y='Intensity')
+ theme(figure_size=(20,16))
)

p.save(os.path.join(args.output_folder, "merged.png"), limitsize=False)

# TODO: Drawing individual per file
all_filenames = list(set(extraction_df["filename"]))
all_queries = list(set(extraction_df["query"]))
for filename in all_filenames:
for query in all_queries:
output_filename = "{}_{}.png".format(filename, query)
filtered_df = extraction_df[extraction_df["filename"] == filename]
filtered_df = filtered_df[filtered_df["query"] == query]

print(filtered_df)
print(len(filtered_df))

p = (
ggplot(filtered_df, aes(x='rt', y='int'))
+ geom_line() # line plot
+ labs(x='RT', y='Intensity')
+ theme(figure_size=(15,10))
)

p.save(os.path.join(args.output_folder, output_filename), limitsize=False)





if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions xicbatch/tools/xicbatch/ming_fileio_library.py
1 change: 1 addition & 0 deletions xicbatch/tools/xicbatch/ming_proteosafe_library.py
1 change: 1 addition & 0 deletions xicbatch/tools/xicbatch/msaccess
91 changes: 91 additions & 0 deletions xicbatch/xicbatch/binding.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
<?xml version="1.0" encoding="ISO-8859-1" ?>
<binding>
<bind action="begin" type="download">
<url value="{livesearch.download}"/>
<query name="task" valueRef="@task" />
<query name="user" valueRef="@user"/>
<compression type="zip" />
<download port="spectra" type="folder">
<query name="resource" value="spectra"/>
</download>
<download port="flowParams" type="file">
<query name="resource" value="params"/>
</download>
<downloadParams>
<query name="resource" value="params"/>
</downloadParams>

</bind>

<bind action="calculate_xic" tool="calculate_xic">
<inputAsRequirement port="workflowParameters" requirement="workflowParameters"/>
<inputAsRequirement port="spectra" requirement="spectra"/>
<productionToOutput port="results" production="results"/>
<productionToOutput port="extraction_results" production="extraction_results"/>
<productionToOutput port="ms2_extraction_results" production="ms2_extraction_results"/>
</bind>

<bind action="demangle_results" tool="demangle_results">
<inputAsRequirement port="workflowParameters" requirement="workflowParameters"/>
<inputAsRequirement port="results" requirement="results"/>
<productionToOutput port="demangled_results" production="demangled_results"/>
</bind>

<bind action="demangle_extraction_results" tool="demangle_results">
<inputAsRequirement port="workflowParameters" requirement="workflowParameters"/>
<inputAsRequirement port="extraction_results" requirement="results"/>
<productionToOutput port="demangled_extraction_results" production="demangled_results"/>
</bind>

<bind action="demangle_ms2_extraction_results" tool="demangle_results">
<inputAsRequirement port="workflowParameters" requirement="workflowParameters"/>
<inputAsRequirement port="ms2_extraction_results" requirement="results"/>
<productionToOutput port="demangled_ms2_extraction_results" production="demangled_results"/>
</bind>



<bind action="draw_results" tool="draw_results">
<inputAsRequirement port="workflowParameters" requirement="workflowParameters"/>
<inputAsRequirement port="extraction_results" requirement="extraction_results"/>
<productionToOutput port="visualized_results" production="visualized_results"/>
</bind>



<bind action="end" type="upload">
<url value="{livesearch.upload}"/>
<query name="task" valueRef="@task"/>
<contentQuery name="content"/>
<compression type="zip"/>

<upload port="results" type="folder">
<query name="resource" value="results"/>
</upload>

<upload port="extraction_results" type="folder">
<query name="resource" value="extraction_results"/>
</upload>

<upload port="ms2_extraction_results" type="folder">
<query name="resource" value="ms2_extraction_results"/>
</upload>

<upload port="demangled_results" type="folder">
<query name="resource" value="demangled_results"/>
</upload>

<upload port="demangled_extraction_results" type="folder">
<query name="resource" value="demangled_extraction_results"/>
</upload>

<upload port="demangled_ms2_extraction_results" type="folder">
<query name="resource" value="demangled_ms2_extraction_results"/>
</upload>

<upload port="visualized_results" type="folder">
<query name="resource" value="visualized_results"/>
</upload>

</bind>
</binding>
62 changes: 62 additions & 0 deletions xicbatch/xicbatch/flow.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?xml version="1.0" encoding="ISO-8859-1" ?>
<flow name="xicbatch">
<object name="workflowParameters"/>
<collection name="spectra"/>

<action name="begin">
<output port="flowParams" object="workflowParameters"/>
<output port="spectra" collection="spectra"/>
</action>

<object name="results"/>
<object name="extraction_results"/>
<object name="ms2_extraction_results"/>
<action name="calculate_xic">
<input port="workflowParameters" object="workflowParameters"/>
<input port="spectra" collection="spectra"/>
<output port="results" object="results"/>
<output port="extraction_results" object="extraction_results"/>
<output port="ms2_extraction_results" object="ms2_extraction_results"/>
</action>

<object name="demangled_results"/>
<action name="demangle_results">
<input port="workflowParameters" object="workflowParameters"/>
<input port="results" object="results"/>
<output port="demangled_results" object="demangled_results"/>
</action>

<object name="demangled_extraction_results"/>
<action name="demangle_extraction_results">
<input port="workflowParameters" object="workflowParameters"/>
<input port="extraction_results" object="extraction_results"/>
<output port="demangled_extraction_results" object="demangled_extraction_results"/>
</action>

<object name="demangled_ms2_extraction_results"/>
<action name="demangle_ms2_extraction_results">
<input port="workflowParameters" object="workflowParameters"/>
<input port="ms2_extraction_results" object="ms2_extraction_results"/>
<output port="demangled_ms2_extraction_results" object="demangled_ms2_extraction_results"/>
</action>


<collection name="visualized_results"/>
<action name="draw_results">
<input port="workflowParameters" object="workflowParameters"/>
<input port="extraction_results" object="demangled_extraction_results"/>
<output port="visualized_results" collection="visualized_results"/>
</action>

<action name="end">
<input port="results" object="results"/>
<input port="extraction_results" object="extraction_results"/>
<input port="ms2_extraction_results" object="ms2_extraction_results"/>

<input port="demangled_results" object="demangled_results"/>
<input port="demangled_extraction_results" object="demangled_extraction_results"/>
<input port="demangled_ms2_extraction_results" object="demangled_ms2_extraction_results"/>

<input port="visualized_results" collection="visualized_results"/>
</action>
</flow>
Loading