Skip to content

Commit f40428b

Browse files
author
Julianus Pfeuffer
committed
Added nb for featureXML
1 parent 138b49b commit f40428b

File tree

2 files changed

+576
-1
lines changed

2 files changed

+576
-1
lines changed

PyopenmsPandas.ipynb

+451-1
Large diffs are not rendered by default.

featureXML.ipynb

+125
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
{
2+
"cells": [
3+
{
4+
"metadata": {
5+
"trusted": true
6+
},
7+
"cell_type": "code",
8+
"source": [
9+
"from collections import defaultdict\n",
10+
"from functools import reduce\n",
11+
"from pathlib import Path\n",
12+
"from time import perf_counter\n",
13+
"import sys\n",
14+
"\n",
15+
"from IPython.core.display import display\n",
16+
"from pandas import CategoricalDtype\n",
17+
"import numpy as np\n",
18+
"from pyopenms import *\n",
19+
"import pandas as pd\n",
20+
"import os"
21+
],
22+
"execution_count": 24,
23+
"outputs": []
24+
},
25+
{
26+
"metadata": {
27+
"trusted": true
28+
},
29+
"cell_type": "code",
30+
"source": [
31+
"class FeatureMapDF(FeatureMap):\n",
32+
" def __init__(self):\n",
33+
" super().__init__()\n",
34+
"\n",
35+
" def get_df(self):\n",
36+
" def gen(fmap: FeatureMap, fun):\n",
37+
" for f in fmap:\n",
38+
" yield from fun(f)\n",
39+
"\n",
40+
" def extractMetaData(f: Feature):\n",
41+
" # subfeatures = f.getFeatureList() # type: list[FeatureHandle]\n",
42+
" pep = f.getPeptideIdentifications() # type: list[PeptideIdentification]\n",
43+
" if len(pep) != 0:\n",
44+
" hits = pep[0].getHits()\n",
45+
" if len(hits) != 0:\n",
46+
" besthit = hits[0] # type: PeptideHit\n",
47+
" # TODO what else\n",
48+
" yield f.getUniqueId(), besthit.getSequence().toString(), f.getCharge(), f.getRT(), f.getMZ(), f.getOverallQuality(), f.getIntensity()\n",
49+
" else:\n",
50+
" yield f.getUniqueId(), None, f.getCharge(), f.getRT(), f.getMZ(), f.getOverallQuality(), f.getIntensity()\n",
51+
" else:\n",
52+
" yield f.getUniqueId(), None, f.getCharge(), f.getRT(), f.getMZ(), f.getOverallQuality(), f.getIntensity()\n",
53+
"\n",
54+
" cnt = self.size()\n",
55+
"\n",
56+
" mddtypes = [('id', np.dtype('uint64')), ('sequence', 'U200'), ('charge', 'i4'), ('RT', 'f'), ('mz', 'f'),\n",
57+
" ('quality', 'f'), ('intensity', 'f')]\n",
58+
" mdarr = np.fromiter(iter=gen(self, extractMetaData), dtype=mddtypes, count=cnt)\n",
59+
" return pd.DataFrame(mdarr).set_index('id')"
60+
],
61+
"execution_count": 25,
62+
"outputs": []
63+
},
64+
{
65+
"metadata": {
66+
"trusted": true
67+
},
68+
"cell_type": "code",
69+
"source": [
70+
"fmap = FeatureMapDF()\n",
71+
"from urllib.request import urlretrieve\n",
72+
"urlretrieve (\"https://raw.githubusercontent.com/OpenMS/OpenMS/develop/share/OpenMS/examples/FRACTIONS/BSA1_F1.featureXML\", \"BSA1.featureXML\")\n",
73+
"\n",
74+
"FeatureXMLFile().load(\"BSA1.featureXML\", fmap)\n",
75+
" "
76+
],
77+
"execution_count": 26,
78+
"outputs": []
79+
},
80+
{
81+
"metadata": {
82+
"trusted": true,
83+
"pycharm": {
84+
"name": "#%%\n"
85+
}
86+
},
87+
"cell_type": "code",
88+
"source": [
89+
"display(fmap.get_df())"
90+
],
91+
"execution_count": 27,
92+
"outputs": [
93+
{
94+
"data": {
95+
"text/plain": " sequence charge RT mz quality \\\nid \n9650885788371886430 None 2 1942.600098 395.239288 0.808494 \n18416216708636999474 None 2 1749.138306 443.711212 0.893553 \n4391849314104099247 None 3 1850.058960 325.491180 0.788273 \n8826587690601962141 None 1 1977.238281 388.253906 0.844645 \n446423704489322421 None 2 1901.577637 738.311035 0.770521 \n... ... ... ... ... ... \n10302832019150983352 None 3 1606.777588 426.142517 0.838608 \n8874676592351452214 None 3 1531.052490 403.509857 0.806600 \n2099235613780550198 None 1 1679.266113 344.091949 0.750061 \n16983971133852021113 None 1 1542.787109 330.191254 0.742557 \n11071931317879296440 None 2 1658.548828 485.347015 0.842046 \n\n intensity \nid \n9650885788371886430 1.575720e+08 \n18416216708636999474 5.406930e+07 \n4391849314104099247 5.271700e+07 \n8826587690601962141 3.802820e+07 \n446423704489322421 3.498880e+07 \n... ... \n10302832019150983352 3.678150e+04 \n8874676592351452214 3.128000e+04 \n2099235613780550198 3.097710e+04 \n16983971133852021113 2.789520e+04 \n11071931317879296440 2.438280e+04 \n\n[256 rows x 6 columns]",
96+
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>sequence</th>\n <th>charge</th>\n <th>RT</th>\n <th>mz</th>\n <th>quality</th>\n <th>intensity</th>\n </tr>\n <tr>\n <th>id</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>9650885788371886430</th>\n <td>None</td>\n <td>2</td>\n <td>1942.600098</td>\n <td>395.239288</td>\n <td>0.808494</td>\n <td>1.575720e+08</td>\n </tr>\n <tr>\n <th>18416216708636999474</th>\n <td>None</td>\n <td>2</td>\n <td>1749.138306</td>\n <td>443.711212</td>\n <td>0.893553</td>\n <td>5.406930e+07</td>\n </tr>\n <tr>\n <th>4391849314104099247</th>\n <td>None</td>\n <td>3</td>\n <td>1850.058960</td>\n <td>325.491180</td>\n <td>0.788273</td>\n <td>5.271700e+07</td>\n </tr>\n <tr>\n <th>8826587690601962141</th>\n <td>None</td>\n <td>1</td>\n <td>1977.238281</td>\n <td>388.253906</td>\n <td>0.844645</td>\n <td>3.802820e+07</td>\n </tr>\n <tr>\n <th>446423704489322421</th>\n <td>None</td>\n <td>2</td>\n <td>1901.577637</td>\n <td>738.311035</td>\n <td>0.770521</td>\n <td>3.498880e+07</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>10302832019150983352</th>\n <td>None</td>\n <td>3</td>\n <td>1606.777588</td>\n <td>426.142517</td>\n <td>0.838608</td>\n <td>3.678150e+04</td>\n </tr>\n <tr>\n <th>8874676592351452214</th>\n <td>None</td>\n <td>3</td>\n <td>1531.052490</td>\n <td>403.509857</td>\n <td>0.806600</td>\n <td>3.128000e+04</td>\n </tr>\n <tr>\n <th>2099235613780550198</th>\n <td>None</td>\n <td>1</td>\n <td>1679.266113</td>\n <td>344.091949</td>\n <td>0.750061</td>\n <td>3.097710e+04</td>\n </tr>\n <tr>\n <th>16983971133852021113</th>\n <td>None</td>\n <td>1</td>\n <td>1542.787109</td>\n <td>330.191254</td>\n <td>0.742557</td>\n <td>2.789520e+04</td>\n </tr>\n <tr>\n <th>11071931317879296440</th>\n <td>None</td>\n <td>2</td>\n <td>1658.548828</td>\n <td>485.347015</td>\n <td>0.842046</td>\n <td>2.438280e+04</td>\n </tr>\n </tbody>\n</table>\n<p>256 rows × 6 columns</p>\n</div>"
97+
},
98+
"metadata": {},
99+
"output_type": "display_data"
100+
}
101+
]
102+
}
103+
],
104+
"metadata": {
105+
"kernelspec": {
106+
"name": "python3",
107+
"display_name": "Python 3",
108+
"language": "python"
109+
},
110+
"language_info": {
111+
"name": "python",
112+
"version": "3.7.10",
113+
"mimetype": "text/x-python",
114+
"codemirror_mode": {
115+
"name": "ipython",
116+
"version": 3
117+
},
118+
"pygments_lexer": "ipython3",
119+
"nbconvert_exporter": "python",
120+
"file_extension": ".py"
121+
}
122+
},
123+
"nbformat": 4,
124+
"nbformat_minor": 1
125+
}

0 commit comments

Comments
 (0)