Skip to content

Commit 00e6f56

Browse files
author
Corentin
committed
ontology conversion class
1 parent fe848a0 commit 00e6f56

File tree

1 file changed

+97
-164
lines changed

1 file changed

+97
-164
lines changed

notebooks/import_ontology.ipynb

Lines changed: 97 additions & 164 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,93 @@
11
{
22
"cells": [
3-
{
4-
"cell_type": "code",
5-
"execution_count": null,
6-
"metadata": {},
7-
"outputs": [],
8-
"source": [
9-
"from pronto import Ontology\n",
10-
"go = Ontology(\"go.obo\")\n",
11-
"go"
12-
]
13-
},
14-
{
15-
"cell_type": "code",
16-
"execution_count": null,
17-
"metadata": {},
18-
"outputs": [],
19-
"source": [
20-
"with open(\"ms.json\", \"wb\") as f:\n",
21-
" go.dump(f, format=\"json\")"
22-
]
23-
},
243
{
254
"cell_type": "code",
265
"execution_count": null,
276
"metadata": {},
287
"outputs": [],
298
"source": [
309
"import json\n",
31-
"with open(\"ms.json\", \"r\") as f:\n",
32-
" go = json.load(f)"
33-
]
34-
},
35-
{
36-
"cell_type": "code",
37-
"execution_count": null,
38-
"metadata": {},
39-
"outputs": [],
40-
"source": [
41-
"go[\"graphs\"][0].keys()"
42-
]
43-
},
44-
{
45-
"cell_type": "code",
46-
"execution_count": null,
47-
"metadata": {},
48-
"outputs": [],
49-
"source": [
50-
"go[\"graphs\"][0][\"nodes\"][0]"
51-
]
52-
},
53-
{
54-
"cell_type": "code",
55-
"execution_count": null,
56-
"metadata": {},
57-
"outputs": [],
58-
"source": [
59-
"edge_dict: dict = {}\n",
60-
"for relationship in go[\"graphs\"][0][\"edges\"]:\n",
61-
" parent_list = edge_dict.get(relationship[\"sub\"].split(\"/\")[-1], [])\n",
62-
" parent_list.append((relationship[\"obj\"].split(\"/\")[-1], relationship[\"pred\"]))\n",
63-
" edge_dict[relationship[\"sub\"].split(\"/\")[-1]] = parent_list"
64-
]
65-
},
66-
{
67-
"cell_type": "code",
68-
"execution_count": null,
69-
"metadata": {},
70-
"outputs": [],
71-
"source": [
72-
"edge_dict"
73-
]
74-
},
75-
{
76-
"cell_type": "code",
77-
"execution_count": null,
78-
"metadata": {},
79-
"outputs": [],
80-
"source": [
81-
"for go_term in go[\"graphs\"][0][\"nodes\"]:\n",
82-
" if go_term[\"type\"] != \"CLASS\":\n",
83-
" print(go_term)"
10+
"import random\n",
11+
"from pronto import Ontology, Definition\n",
12+
"\n",
13+
"class ImpatientVocab():\n",
14+
" def __init__(self) -> None:\n",
15+
" self.used_colors: list[str] = []\n",
16+
" self.impatient_json: list[dict] = []\n",
17+
" self.impatient_onto: Ontology = None\n",
18+
" self.list_of_terms: list[str] = []\n",
19+
"\n",
20+
" def load_json(self, path: str) -> list[dict]:\n",
21+
" self.impatient_json = json.load(open(path, \"r\"))\n",
22+
" return self.impatient_json\n",
23+
" \n",
24+
" def load_ontology(self, path: str) -> Ontology:\n",
25+
" self.impatient_onto = Ontology(path)\n",
26+
" return self.impatient_onto\n",
27+
" \n",
28+
" def json_to_onto(self) -> Ontology:\n",
29+
" self.impatient_onto = Ontology()\n",
30+
" for term in self.impatient_json:\n",
31+
" added_term = self.impatient_onto.create_term(term[\"id\"].replace(\"_\", \":\"))\n",
32+
" added_term.name = term[\"text\"]\n",
33+
" for syn in term[\"data\"][\"synonymes\"].split(\",\"):\n",
34+
" if syn != \"\":\n",
35+
" added_term.add_synonym(syn, scope=\"EXACT\")\n",
36+
" if term[\"data\"][\"description\"] != \"\":\n",
37+
" added_term.definition = Definition(term[\"data\"][\"description\"])\n",
38+
" if term[\"parent\"] != \"#\":\n",
39+
" added_term.superclasses().add(self.impatient_onto[term[\"parent\"].replace(\"_\", \":\")])\n",
40+
" \n",
41+
" self.list_of_terms.append(added_term)\n",
42+
" return self.impatient_onto\n",
43+
" \n",
44+
" def onto_to_json(self) -> list[dict]:\n",
45+
" self.impatient_json = []\n",
46+
" index = 0\n",
47+
" for term in self.impatient_onto.terms():\n",
48+
" relationships = []\n",
49+
" for rel in term.superclasses():\n",
50+
" relationships.append(rel.id)\n",
51+
" relationships.pop(0)\n",
52+
" self.impatient_json.append(\n",
53+
" {\n",
54+
" \"id\": term.id.replace(\"_\", \":\"),\n",
55+
" \"text\": term.name,\n",
56+
" \"icon\": True,\n",
57+
" \"data\": {\n",
58+
" \"description\": term.definition if term.definition is not None else \"\",\n",
59+
" \"synonymes\": \",\".join([syn.description for syn in term.synonyms]),\n",
60+
" \"phenotype_datamined\": \"\",\n",
61+
" \"gene_datamined\": \"\",\n",
62+
" \"alternative_language\": term.name,\n",
63+
" \"correlates_with\": \"\",\n",
64+
" \"image_annotation\": True if index == 0 else False,\n",
65+
" \"hex_color\": self._generate_hex_color(),\n",
66+
" \"hpo_datamined\": \"\",\n",
67+
" },\n",
68+
" \"parent\": relationships[0].replace(\"_\", \":\") if relationships != [] else \"#\"\n",
69+
" }\n",
70+
" )\n",
71+
" index += 1\n",
72+
" return self.impatient_json\n",
73+
" \n",
74+
" def _generate_hex_color(self):\n",
75+
" while True:\n",
76+
" # Generate a random hex color\n",
77+
" color = \"#{:06x}\".format(random.randint(0, 0xFFFFFF))\n",
78+
" # Check if the color has already been used\n",
79+
" if color not in self.used_colors:\n",
80+
" # Add the color to the list of used colors and return it\n",
81+
" self.used_colors.append(color)\n",
82+
" return color\n",
83+
" \n",
84+
" def dump_onto(self, path: str) -> None:\n",
85+
" with open(path, \"wb\") as f:\n",
86+
" self.impatient_onto.dump(f, format=\"obo\")\n",
87+
"\n",
88+
" def dump_json(self, path: str) -> None:\n",
89+
" with open(path, \"w\") as f:\n",
90+
" json.dump(self.impatient_json, f, indent=2)"
8491
]
8592
},
8693
{
@@ -89,101 +96,22 @@
8996
"metadata": {},
9097
"outputs": [],
9198
"source": [
92-
"names: list[str] = []\n",
93-
"id: list[str] = []\n",
94-
"desc: list[str] = []\n",
95-
"synonymes: list[list[str]] = []\n",
96-
"\n",
97-
"for go_term in go[\"graphs\"][0][\"nodes\"]:\n",
98-
" if go_term[\"type\"] == \"CLASS\":\n",
99-
" id.append(go_term[\"id\"].split(\"/\")[-1])\n",
100-
" names.append(go_term[\"lbl\"])\n",
101-
" desc.append(go_term[\"meta\"][\"definition\"][\"val\"])\n",
102-
" synonymes.append([syn[\"val\"] for syn in go_term[\"meta\"][\"synonyms\"]])"
99+
"my_onto = ImpatientVocab()\n",
100+
"my_onto.load_json(\"ontology.json.demo\")\n",
101+
"my_onto.json_to_onto()\n",
102+
"my_onto.dump_onto(\"ontology_imp.obo\")"
103103
]
104104
},
105-
{
106-
"cell_type": "code",
107-
"execution_count": null,
108-
"metadata": {},
109-
"outputs": [],
110-
"source": []
111-
},
112105
{
113106
"cell_type": "code",
114107
"execution_count": null,
115108
"metadata": {},
116109
"outputs": [],
117110
"source": [
118-
"import jsonschema\n",
119-
"from jsonschema import validate\n",
120-
"\n",
121-
"impatient_json: list[dict] = []\n",
122-
"impatient_json_schema = {\n",
123-
" \"type\": \"object\",\n",
124-
" \"properties\": {\n",
125-
" \"id\": {\"type\": \"string\"},\n",
126-
" \"text\": {\"type\": \"string\"},\n",
127-
" \"icon\": {\"type\": \"boolean\"},\n",
128-
" \"data\": {\n",
129-
" \"type\": \"object\",\n",
130-
" \"properties\": {\n",
131-
" \"description\": {\"type\": \"string\"},\n",
132-
" \"synonymes\": {\"type\": \"string\"},\n",
133-
" \"phenotype_datamined\": {\"type\": \"string\"},\n",
134-
" \"gene_datamined\": {\"type\": \"string\"},\n",
135-
" \"alternative_language\": {\"type\": \"string\"},\n",
136-
" \"correlates_with\": {\"type\": \"string\"},\n",
137-
" \"image_annotation\": {\"type\": \"boolean\"},\n",
138-
" \"hex_color\": {\"type\": \"string\", \"pattern\": \"^#[0-9a-fA-F]{6}$\"},\n",
139-
" \"hpo_datamined\": {\"type\": \"string\"},\n",
140-
" },\n",
141-
" \"required\": [\n",
142-
" \"description\",\n",
143-
" \"synonymes\",\n",
144-
" \"phenotype_datamined\",\n",
145-
" \"gene_datamined\",\n",
146-
" \"alternative_language\",\n",
147-
" \"correlates_with\",\n",
148-
" \"image_annotation\",\n",
149-
" \"hex_color\",\n",
150-
" \"hpo_datamined\",\n",
151-
" ],\n",
152-
" },\n",
153-
" \"parent\": {\"type\": \"string\"},\n",
154-
" },\n",
155-
" \"required\": [\"id\", \"text\", \"icon\", \"data\", \"parent\"],\n",
156-
"}\n",
157-
"\n",
158-
"for index in range(len(id)):\n",
159-
" impatient_json.append(\n",
160-
" {\n",
161-
" \"id\": id[index].replace(\"_\", \":\"),\n",
162-
" \"text\": names[index],\n",
163-
" \"icon\": True,\n",
164-
" \"data\": {\n",
165-
" \"description\": desc[index],\n",
166-
" \"synonymes\": ','.join(synonymes[index]),\n",
167-
" \"phenotype_datamined\": \"\",\n",
168-
" \"gene_datamined\": \"\",\n",
169-
" \"alternative_language\": names[index],\n",
170-
" \"correlates_with\": \"\",\n",
171-
" \"image_annotation\": True if index==0 else False,\n",
172-
" \"hex_color\": \"#FFFFFF\",\n",
173-
" \"hpo_datamined\": \"\",\n",
174-
" },\n",
175-
" \"parent\": \"#\",\n",
176-
" }\n",
177-
" )\n",
178-
" \n",
179-
"for child, parent in edge_dict.items():\n",
180-
" try:\n",
181-
" index_term = id.index(child)\n",
182-
" except ValueError:\n",
183-
" print(f\"Term {child} not found in the list of terms\")\n",
184-
" continue\n",
185-
" # Only one parent so yeah we are loosing information.\n",
186-
" impatient_json[index_term][\"parent\"] = parent[0][0].replace(\"_\", \":\")"
111+
"my_onto = ImpatientVocab()\n",
112+
"my_onto.load_ontology(\"goslim_agr.obo\")\n",
113+
"my_onto.onto_to_json()\n",
114+
"my_onto.dump_json(\"obo_to_json_GO.json\")"
187115
]
188116
},
189117
{
@@ -192,7 +120,10 @@
192120
"metadata": {},
193121
"outputs": [],
194122
"source": [
195-
"json.dump(impatient_json, open(\"impatient.json\", \"w\"))"
123+
"my_onto = ImpatientVocab()\n",
124+
"my_onto.load_ontology(\"ontology_imp.obo\")\n",
125+
"my_onto.onto_to_json()\n",
126+
"my_onto.dump_json(\"obo_to_json_IMP.json\")"
196127
]
197128
},
198129
{
@@ -201,8 +132,10 @@
201132
"metadata": {},
202133
"outputs": [],
203134
"source": [
204-
"for idx, json_data in enumerate(impatient_json, start=1):\n",
205-
" validate(instance=json_data, schema=impatient_json_schema)"
135+
"my_onto = ImpatientVocab()\n",
136+
"my_onto.load_ontology(\"hp.owl\")\n",
137+
"my_onto.onto_to_json()\n",
138+
"my_onto.dump_json(\"obo_to_json_HPO.json\")"
206139
]
207140
}
208141
],

0 commit comments

Comments
 (0)