Skip to content

Commit 7e6984b

Browse files
author
Corentin
committed
start work ontology import
1 parent 38560a3 commit 7e6984b

File tree

4 files changed

+298
-4
lines changed

4 files changed

+298
-4
lines changed

.gitignore

+4-1
Original file line numberDiff line numberDiff line change
@@ -178,4 +178,7 @@ docker/run.sh
178178
IMPatienT
179179
!data/images/demo_patient
180180
.idea
181-
.ruff_cache
181+
.ruff_cache
182+
data/backup/*
183+
notebooks/*
184+
!notebooks/*.ipynb

notebooks/import_ontology.ipynb

+211
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 23,
6+
"metadata": {},
7+
"outputs": [
8+
{
9+
"ename": "TypeError",
10+
"evalue": "dumps() got multiple values for argument 'format'",
11+
"output_type": "error",
12+
"traceback": [
13+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
14+
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
15+
"Cell \u001b[0;32mIn[23], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpronto\u001b[39;00m \u001b[39mimport\u001b[39;00m Ontology\n\u001b[1;32m 2\u001b[0m go \u001b[39m=\u001b[39m Ontology(\u001b[39m\"\u001b[39m\u001b[39mgoslim_agr.obo\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m go_json \u001b[39m=\u001b[39m go\u001b[39m.\u001b[39;49mdumps(f, \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mjson\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
16+
"\u001b[0;31mTypeError\u001b[0m: dumps() got multiple values for argument 'format'"
17+
]
18+
}
19+
],
20+
"source": [
21+
"from pronto import Ontology\n",
22+
"go = Ontology(\"goslim_agr.obo\")\n",
23+
"go"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": 12,
29+
"metadata": {},
30+
"outputs": [],
31+
"source": [
32+
"with open(\"ms.json\", \"wb\") as f:\n",
33+
" go.dumps(f, format=\"json\")"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": 33,
39+
"metadata": {},
40+
"outputs": [
41+
{
42+
"data": {
43+
"text/plain": [
44+
"dict_keys(['nodes', 'edges', 'id', 'lbl', 'meta', 'equivalentNodesSets', 'logicalDefinitionAxioms', 'domainRangeAxioms', 'propertyChainAxioms'])"
45+
]
46+
},
47+
"execution_count": 33,
48+
"metadata": {},
49+
"output_type": "execute_result"
50+
}
51+
],
52+
"source": [
53+
"go[\"graphs\"][0].keys()"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": 29,
59+
"metadata": {},
60+
"outputs": [],
61+
"source": [
62+
"import json\n",
63+
"with open(\"ms.json\", \"r\") as f:\n",
64+
" go = json.load(f)"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": 36,
70+
"metadata": {},
71+
"outputs": [
72+
{
73+
"data": {
74+
"text/plain": [
75+
"{'definition': None,\n",
76+
" 'comments': [],\n",
77+
" 'subsets': ['chebi_ph7_3',\n",
78+
" 'gocheck_do_not_annotate',\n",
79+
" 'gocheck_do_not_manually_annotate',\n",
80+
" 'goslim_agr',\n",
81+
" 'goslim_aspergillus',\n",
82+
" 'goslim_candida',\n",
83+
" 'goslim_chembl',\n",
84+
" 'goslim_drosophila',\n",
85+
" 'goslim_flybase_ribbon',\n",
86+
" 'goslim_generic',\n",
87+
" 'goslim_metagenomics',\n",
88+
" 'goslim_mouse',\n",
89+
" 'goslim_pir',\n",
90+
" 'goslim_plant',\n",
91+
" 'goslim_pombe',\n",
92+
" 'goslim_synapse',\n",
93+
" 'goslim_yeast',\n",
94+
" 'prokaryote_subset'],\n",
95+
" 'xrefs': [],\n",
96+
" 'synonyms': [],\n",
97+
" 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#hasOBOFormatVersion',\n",
98+
" 'val': '1.2',\n",
99+
" 'xrefs': [],\n",
100+
" 'meta': None},\n",
101+
" {'pred': 'http://purl.obolibrary.org/obo/owl_versionInfo',\n",
102+
" 'val': '2023-07-27',\n",
103+
" 'xrefs': [],\n",
104+
" 'meta': None}],\n",
105+
" 'version': 'http://purl.obolibrary.org/obo/go/subsets/goslim_agr/go/2023-07-27/subsets/goslim_agr.owl/go/subsets/goslim_agr.owl',\n",
106+
" 'deprecated': False}"
107+
]
108+
},
109+
"execution_count": 36,
110+
"metadata": {},
111+
"output_type": "execute_result"
112+
}
113+
],
114+
"source": [
115+
"go[\"graphs\"][0][\"meta\"]"
116+
]
117+
},
118+
{
119+
"cell_type": "code",
120+
"execution_count": 26,
121+
"metadata": {},
122+
"outputs": [
123+
{
124+
"name": "stdout",
125+
"output_type": "stream",
126+
"text": [
127+
"Term('GO:0000003', name='reproduction')\n",
128+
"Term('GO:0002376', name='immune system process')\n",
129+
"Term('GO:0003677', name='DNA binding')\n",
130+
"Term('GO:0003700', name='DNA-binding transcription factor activity')\n",
131+
"Term('GO:0003723', name='RNA binding')\n",
132+
"Term('GO:0003824', name='catalytic activity')\n",
133+
"Term('GO:0005102', name='signaling receptor binding')\n",
134+
"Term('GO:0005198', name='structural molecule activity')\n",
135+
"Term('GO:0005215', name='transporter activity')\n",
136+
"Term('GO:0005576', name='extracellular region')\n",
137+
"Term('GO:0005634', name='nucleus')\n",
138+
"Term('GO:0005694', name='chromosome')\n",
139+
"Term('GO:0005739', name='mitochondrion')\n",
140+
"Term('GO:0005768', name='endosome')\n",
141+
"Term('GO:0005773', name='vacuole')\n",
142+
"Term('GO:0005783', name='endoplasmic reticulum')\n",
143+
"Term('GO:0005794', name='Golgi apparatus')\n",
144+
"Term('GO:0005829', name='cytosol')\n",
145+
"Term('GO:0005856', name='cytoskeleton')\n",
146+
"Term('GO:0005886', name='plasma membrane')\n",
147+
"Term('GO:0005975', name='carbohydrate metabolic process')\n",
148+
"Term('GO:0006259', name='DNA metabolic process')\n",
149+
"Term('GO:0006629', name='lipid metabolic process')\n",
150+
"Term('GO:0007049', name='cell cycle')\n",
151+
"Term('GO:0007610', name='behavior')\n",
152+
"Term('GO:0008092', name='cytoskeletal protein binding')\n",
153+
"Term('GO:0008134', name='transcription factor binding')\n",
154+
"Term('GO:0008283', name='cell population proliferation')\n",
155+
"Term('GO:0008289', name='lipid binding')\n",
156+
"Term('GO:0009056', name='catabolic process')\n",
157+
"Term('GO:0012501', name='programmed cell death')\n",
158+
"Term('GO:0016043', name='cellular component organization')\n",
159+
"Term('GO:0016070', name='RNA metabolic process')\n",
160+
"Term('GO:0019538', name='protein metabolic process')\n",
161+
"Term('GO:0023052', name='signaling')\n",
162+
"Term('GO:0030054', name='cell junction')\n",
163+
"Term('GO:0030154', name='cell differentiation')\n",
164+
"Term('GO:0030234', name='enzyme regulator activity')\n",
165+
"Term('GO:0030246', name='carbohydrate binding')\n",
166+
"Term('GO:0031410', name='cytoplasmic vesicle')\n",
167+
"Term('GO:0032502', name='developmental process')\n",
168+
"Term('GO:0032991', name='protein-containing complex')\n",
169+
"Term('GO:0036094', name='small molecule binding')\n",
170+
"Term('GO:0038023', name='signaling receptor activity')\n",
171+
"Term('GO:0042592', name='homeostatic process')\n",
172+
"Term('GO:0042995', name='cell projection')\n",
173+
"Term('GO:0045202', name='synapse')\n",
174+
"Term('GO:0050877', name='nervous system process')\n",
175+
"Term('GO:0050896', name='response to stimulus')\n",
176+
"Term('GO:0051234', name='establishment of localization')\n",
177+
"Term('GO:0097367', name='carbohydrate derivative binding')\n",
178+
"Term('GO:1901135', name='carbohydrate derivative metabolic process')\n",
179+
"Term('GO:0046872', name='metal ion binding')\n"
180+
]
181+
}
182+
],
183+
"source": [
184+
"for terms in go.terms():\n",
185+
" print(terms)"
186+
]
187+
}
188+
],
189+
"metadata": {
190+
"kernelspec": {
191+
"display_name": ".venv",
192+
"language": "python",
193+
"name": "python3"
194+
},
195+
"language_info": {
196+
"codemirror_mode": {
197+
"name": "ipython",
198+
"version": 3
199+
},
200+
"file_extension": ".py",
201+
"mimetype": "text/x-python",
202+
"name": "python",
203+
"nbconvert_exporter": "python",
204+
"pygments_lexer": "ipython3",
205+
"version": "3.8.16"
206+
},
207+
"orig_nbformat": 4
208+
},
209+
"nbformat": 4,
210+
"nbformat_minor": 2
211+
}

poetry.lock

+82-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ fr_core_news_sm = { url = "https://github.com/explosion/spacy-models/releases/do
4646
Flask-Cors = "^3.0.10"
4747
textacy = "^0.12.0"
4848
bleach = "^5.0.1"
49+
pronto = "^2.5.5"
4950

5051
[tool.poetry.group.dev.dependencies]
5152
ruff = "^0.0.221"

0 commit comments

Comments
 (0)