1
+ {
2
+ "cells" : [
3
+ {
4
+ "metadata" : {
5
+ "trusted" : true
6
+ },
7
+ "cell_type" : " code" ,
8
+ "source" : [
9
+ " from collections import defaultdict\n " ,
10
+ " from functools import reduce\n " ,
11
+ " from pathlib import Path\n " ,
12
+ " from time import perf_counter\n " ,
13
+ " import sys\n " ,
14
+ " \n " ,
15
+ " from IPython.core.display import display\n " ,
16
+ " from pandas import CategoricalDtype\n " ,
17
+ " import numpy as np\n " ,
18
+ " from pyopenms import *\n " ,
19
+ " import pandas as pd\n " ,
20
+ " import os"
21
+ ],
22
+ "execution_count" : 24 ,
23
+ "outputs" : []
24
+ },
25
+ {
26
+ "metadata" : {
27
+ "trusted" : true
28
+ },
29
+ "cell_type" : " code" ,
30
+ "source" : [
31
+ " class FeatureMapDF(FeatureMap):\n " ,
32
+ " def __init__(self):\n " ,
33
+ " super().__init__()\n " ,
34
+ " \n " ,
35
+ " def get_df(self):\n " ,
36
+ " def gen(fmap: FeatureMap, fun):\n " ,
37
+ " for f in fmap:\n " ,
38
+ " yield from fun(f)\n " ,
39
+ " \n " ,
40
+ " def extractMetaData(f: Feature):\n " ,
41
+ " # subfeatures = f.getFeatureList() # type: list[FeatureHandle]\n " ,
42
+ " pep = f.getPeptideIdentifications() # type: list[PeptideIdentification]\n " ,
43
+ " if len(pep) != 0:\n " ,
44
+ " hits = pep[0].getHits()\n " ,
45
+ " if len(hits) != 0:\n " ,
46
+ " besthit = hits[0] # type: PeptideHit\n " ,
47
+ " # TODO what else\n " ,
48
+ " yield f.getUniqueId(), besthit.getSequence().toString(), f.getCharge(), f.getRT(), f.getMZ(), f.getOverallQuality(), f.getIntensity()\n " ,
49
+ " else:\n " ,
50
+ " yield f.getUniqueId(), None, f.getCharge(), f.getRT(), f.getMZ(), f.getOverallQuality(), f.getIntensity()\n " ,
51
+ " else:\n " ,
52
+ " yield f.getUniqueId(), None, f.getCharge(), f.getRT(), f.getMZ(), f.getOverallQuality(), f.getIntensity()\n " ,
53
+ " \n " ,
54
+ " cnt = self.size()\n " ,
55
+ " \n " ,
56
+ " mddtypes = [('id', np.dtype('uint64')), ('sequence', 'U200'), ('charge', 'i4'), ('RT', 'f'), ('mz', 'f'),\n " ,
57
+ " ('quality', 'f'), ('intensity', 'f')]\n " ,
58
+ " mdarr = np.fromiter(iter=gen(self, extractMetaData), dtype=mddtypes, count=cnt)\n " ,
59
+ " return pd.DataFrame(mdarr).set_index('id')"
60
+ ],
61
+ "execution_count" : 25 ,
62
+ "outputs" : []
63
+ },
64
+ {
65
+ "metadata" : {
66
+ "trusted" : true
67
+ },
68
+ "cell_type" : " code" ,
69
+ "source" : [
70
+ " fmap = FeatureMapDF()\n " ,
71
+ " from urllib.request import urlretrieve\n " ,
72
+ " urlretrieve (\" https://raw.githubusercontent.com/OpenMS/OpenMS/develop/share/OpenMS/examples/FRACTIONS/BSA1_F1.featureXML\" , \" BSA1.featureXML\" )\n " ,
73
+ " \n " ,
74
+ " FeatureXMLFile().load(\" BSA1.featureXML\" , fmap)\n " ,
75
+ " "
76
+ ],
77
+ "execution_count" : 26 ,
78
+ "outputs" : []
79
+ },
80
+ {
81
+ "metadata" : {
82
+ "trusted" : true ,
83
+ "pycharm" : {
84
+ "name" : " #%%\n "
85
+ }
86
+ },
87
+ "cell_type" : " code" ,
88
+ "source" : [
89
+ " display(fmap.get_df())"
90
+ ],
91
+ "execution_count" : 27 ,
92
+ "outputs" : [
93
+ {
94
+ "data" : {
95
+ "text/plain": " sequence charge RT mz quality \\\nid \n9650885788371886430 None 2 1942.600098 395.239288 0.808494 \n18416216708636999474 None 2 1749.138306 443.711212 0.893553 \n4391849314104099247 None 3 1850.058960 325.491180 0.788273 \n8826587690601962141 None 1 1977.238281 388.253906 0.844645 \n446423704489322421 None 2 1901.577637 738.311035 0.770521 \n... ... ... ... ... ... \n10302832019150983352 None 3 1606.777588 426.142517 0.838608 \n8874676592351452214 None 3 1531.052490 403.509857 0.806600 \n2099235613780550198 None 1 1679.266113 344.091949 0.750061 \n16983971133852021113 None 1 1542.787109 330.191254 0.742557 \n11071931317879296440 None 2 1658.548828 485.347015 0.842046 \n\n intensity \nid \n9650885788371886430 1.575720e+08 \n18416216708636999474 5.406930e+07 \n4391849314104099247 5.271700e+07 \n8826587690601962141 3.802820e+07 \n446423704489322421 3.498880e+07 \n... ... \n10302832019150983352 3.678150e+04 \n8874676592351452214 3.128000e+04 \n2099235613780550198 3.097710e+04 \n16983971133852021113 2.789520e+04 \n11071931317879296440 2.438280e+04 \n\n[256 rows x 6 columns]",
96
+ "text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>sequence</th>\n <th>charge</th>\n <th>RT</th>\n <th>mz</th>\n <th>quality</th>\n <th>intensity</th>\n </tr>\n <tr>\n <th>id</th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>9650885788371886430</th>\n <td>None</td>\n <td>2</td>\n <td>1942.600098</td>\n <td>395.239288</td>\n <td>0.808494</td>\n <td>1.575720e+08</td>\n </tr>\n <tr>\n <th>18416216708636999474</th>\n <td>None</td>\n <td>2</td>\n <td>1749.138306</td>\n <td>443.711212</td>\n <td>0.893553</td>\n <td>5.406930e+07</td>\n </tr>\n <tr>\n <th>4391849314104099247</th>\n <td>None</td>\n <td>3</td>\n <td>1850.058960</td>\n <td>325.491180</td>\n <td>0.788273</td>\n <td>5.271700e+07</td>\n </tr>\n <tr>\n <th>8826587690601962141</th>\n <td>None</td>\n <td>1</td>\n <td>1977.238281</td>\n <td>388.253906</td>\n <td>0.844645</td>\n <td>3.802820e+07</td>\n </tr>\n <tr>\n <th>446423704489322421</th>\n <td>None</td>\n <td>2</td>\n <td>1901.577637</td>\n <td>738.311035</td>\n <td>0.770521</td>\n <td>3.498880e+07</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>10302832019150983352</th>\n <td>None</td>\n <td>3</td>\n <td>1606.777588</td>\n <td>426.142517</td>\n <td>0.838608</td>\n <td>3.678150e+04</td>\n </tr>\n <tr>\n <th>8874676592351452214</th>\n <td>None</td>\n <td>3</td>\n <td>1531.052490</td>\n <td>403.509857</td>\n <td>0.806600</td>\n <td>3.128000e+04</td>\n </tr>\n <tr>\n <th>2099235613780550198</th>\n <td>None</td>\n <td>1</td>\n <td>1679.266113</td>\n <td>344.091949</td>\n <td>0.750061</td>\n <td>3.097710e+04</td>\n </tr>\n <tr>\n <th>16983971133852021113</th>\n <td>None</td>\n <td>1</td>\n <td>1542.787109</td>\n <td>330.191254</td>\n <td>0.742557</td>\n <td>2.789520e+04</td>\n </tr>\n <tr>\n <th>11071931317879296440</th>\n <td>None</td>\n <td>2</td>\n <td>1658.548828</td>\n <td>485.347015</td>\n <td>0.842046</td>\n <td>2.438280e+04</td>\n </tr>\n </tbody>\n</table>\n<p>256 rows × 6 columns</p>\n</div>"
97
+ },
98
+ "metadata" : {},
99
+ "output_type" : " display_data"
100
+ }
101
+ ]
102
+ }
103
+ ],
104
+ "metadata" : {
105
+ "kernelspec" : {
106
+ "name" : " python3" ,
107
+ "display_name" : " Python 3" ,
108
+ "language" : " python"
109
+ },
110
+ "language_info" : {
111
+ "name" : " python" ,
112
+ "version" : " 3.7.10" ,
113
+ "mimetype" : " text/x-python" ,
114
+ "codemirror_mode" : {
115
+ "name" : " ipython" ,
116
+ "version" : 3
117
+ },
118
+ "pygments_lexer" : " ipython3" ,
119
+ "nbconvert_exporter" : " python" ,
120
+ "file_extension" : " .py"
121
+ }
122
+ },
123
+ "nbformat" : 4 ,
124
+ "nbformat_minor" : 1
125
+ }
0 commit comments