|
1 | 1 | {
|
2 | 2 | "cells": [
|
3 |
| - { |
4 |
| - "cell_type": "code", |
5 |
| - "execution_count": null, |
6 |
| - "metadata": {}, |
7 |
| - "outputs": [], |
8 |
| - "source": [ |
9 |
| - "from pronto import Ontology\n", |
10 |
| - "go = Ontology(\"go.obo\")\n", |
11 |
| - "go" |
12 |
| - ] |
13 |
| - }, |
14 |
| - { |
15 |
| - "cell_type": "code", |
16 |
| - "execution_count": null, |
17 |
| - "metadata": {}, |
18 |
| - "outputs": [], |
19 |
| - "source": [ |
20 |
| - "with open(\"ms.json\", \"wb\") as f:\n", |
21 |
| - " go.dump(f, format=\"json\")" |
22 |
| - ] |
23 |
| - }, |
24 | 3 | {
|
25 | 4 | "cell_type": "code",
|
26 | 5 | "execution_count": null,
|
27 | 6 | "metadata": {},
|
28 | 7 | "outputs": [],
|
29 | 8 | "source": [
|
30 | 9 | "import json\n",
|
31 |
| - "with open(\"ms.json\", \"r\") as f:\n", |
32 |
| - " go = json.load(f)" |
33 |
| - ] |
34 |
| - }, |
35 |
| - { |
36 |
| - "cell_type": "code", |
37 |
| - "execution_count": null, |
38 |
| - "metadata": {}, |
39 |
| - "outputs": [], |
40 |
| - "source": [ |
41 |
| - "go[\"graphs\"][0].keys()" |
42 |
| - ] |
43 |
| - }, |
44 |
| - { |
45 |
| - "cell_type": "code", |
46 |
| - "execution_count": null, |
47 |
| - "metadata": {}, |
48 |
| - "outputs": [], |
49 |
| - "source": [ |
50 |
| - "go[\"graphs\"][0][\"nodes\"][0]" |
51 |
| - ] |
52 |
| - }, |
53 |
| - { |
54 |
| - "cell_type": "code", |
55 |
| - "execution_count": null, |
56 |
| - "metadata": {}, |
57 |
| - "outputs": [], |
58 |
| - "source": [ |
59 |
| - "edge_dict: dict = {}\n", |
60 |
| - "for relationship in go[\"graphs\"][0][\"edges\"]:\n", |
61 |
| - " parent_list = edge_dict.get(relationship[\"sub\"].split(\"/\")[-1], [])\n", |
62 |
| - " parent_list.append((relationship[\"obj\"].split(\"/\")[-1], relationship[\"pred\"]))\n", |
63 |
| - " edge_dict[relationship[\"sub\"].split(\"/\")[-1]] = parent_list" |
64 |
| - ] |
65 |
| - }, |
66 |
| - { |
67 |
| - "cell_type": "code", |
68 |
| - "execution_count": null, |
69 |
| - "metadata": {}, |
70 |
| - "outputs": [], |
71 |
| - "source": [ |
72 |
| - "edge_dict" |
73 |
| - ] |
74 |
| - }, |
75 |
| - { |
76 |
| - "cell_type": "code", |
77 |
| - "execution_count": null, |
78 |
| - "metadata": {}, |
79 |
| - "outputs": [], |
80 |
| - "source": [ |
81 |
| - "for go_term in go[\"graphs\"][0][\"nodes\"]:\n", |
82 |
| - " if go_term[\"type\"] != \"CLASS\":\n", |
83 |
| - " print(go_term)" |
| 10 | + "import random\n", |
| 11 | + "from pronto import Ontology, Definition\n", |
| 12 | + "\n", |
| 13 | + "class ImpatientVocab():\n", |
| 14 | + " def __init__(self) -> None:\n", |
| 15 | + " self.used_colors: list[str] = []\n", |
| 16 | + " self.impatient_json: list[dict] = []\n", |
| 17 | + " self.impatient_onto: Ontology = None\n", |
| 18 | + " self.list_of_terms: list[str] = []\n", |
| 19 | + "\n", |
| 20 | + " def load_json(self, path: str) -> list[dict]:\n", |
| 21 | + " self.impatient_json = json.load(open(path, \"r\"))\n", |
| 22 | + " return self.impatient_json\n", |
| 23 | + " \n", |
| 24 | + " def load_ontology(self, path: str) -> Ontology:\n", |
| 25 | + " self.impatient_onto = Ontology(path)\n", |
| 26 | + " return self.impatient_onto\n", |
| 27 | + " \n", |
| 28 | + " def json_to_onto(self) -> Ontology:\n", |
| 29 | + " self.impatient_onto = Ontology()\n", |
| 30 | + " for term in self.impatient_json:\n", |
| 31 | + " added_term = self.impatient_onto.create_term(term[\"id\"].replace(\"_\", \":\"))\n", |
| 32 | + " added_term.name = term[\"text\"]\n", |
| 33 | + " for syn in term[\"data\"][\"synonymes\"].split(\",\"):\n", |
| 34 | + " if syn != \"\":\n", |
| 35 | + " added_term.add_synonym(syn, scope=\"EXACT\")\n", |
| 36 | + " if term[\"data\"][\"description\"] != \"\":\n", |
| 37 | + " added_term.definition = Definition(term[\"data\"][\"description\"])\n", |
| 38 | + " if term[\"parent\"] != \"#\":\n", |
| 39 | + " added_term.superclasses().add(self.impatient_onto[term[\"parent\"].replace(\"_\", \":\")])\n", |
| 40 | + " \n", |
| 41 | + " self.list_of_terms.append(added_term)\n", |
| 42 | + " return self.impatient_onto\n", |
| 43 | + " \n", |
| 44 | + " def onto_to_json(self) -> list[dict]:\n", |
| 45 | + " self.impatient_json = []\n", |
| 46 | + " index = 0\n", |
| 47 | + " for term in self.impatient_onto.terms():\n", |
| 48 | + " relationships = []\n", |
| 49 | + " for rel in term.superclasses():\n", |
| 50 | + " relationships.append(rel.id)\n", |
| 51 | + " relationships.pop(0)\n", |
| 52 | + " self.impatient_json.append(\n", |
| 53 | + " {\n", |
| 54 | + " \"id\": term.id.replace(\"_\", \":\"),\n", |
| 55 | + " \"text\": term.name,\n", |
| 56 | + " \"icon\": True,\n", |
| 57 | + " \"data\": {\n", |
| 58 | + " \"description\": term.definition if term.definition is not None else \"\",\n", |
| 59 | + " \"synonymes\": \",\".join([syn.description for syn in term.synonyms]),\n", |
| 60 | + " \"phenotype_datamined\": \"\",\n", |
| 61 | + " \"gene_datamined\": \"\",\n", |
| 62 | + " \"alternative_language\": term.name,\n", |
| 63 | + " \"correlates_with\": \"\",\n", |
| 64 | + " \"image_annotation\": True if index == 0 else False,\n", |
| 65 | + " \"hex_color\": self._generate_hex_color(),\n", |
| 66 | + " \"hpo_datamined\": \"\",\n", |
| 67 | + " },\n", |
| 68 | + " \"parent\": relationships[0].replace(\"_\", \":\") if relationships != [] else \"#\"\n", |
| 69 | + " }\n", |
| 70 | + " )\n", |
| 71 | + " index += 1\n", |
| 72 | + " return self.impatient_json\n", |
| 73 | + " \n", |
| 74 | + " def _generate_hex_color(self):\n", |
| 75 | + " while True:\n", |
| 76 | + " # Generate a random hex color\n", |
| 77 | + " color = \"#{:06x}\".format(random.randint(0, 0xFFFFFF))\n", |
| 78 | + " # Check if the color has already been used\n", |
| 79 | + " if color not in self.used_colors:\n", |
| 80 | + " # Add the color to the list of used colors and return it\n", |
| 81 | + " self.used_colors.append(color)\n", |
| 82 | + " return color\n", |
| 83 | + " \n", |
| 84 | + " def dump_onto(self, path: str) -> None:\n", |
| 85 | + " with open(path, \"wb\") as f:\n", |
| 86 | + " self.impatient_onto.dump(f, format=\"obo\")\n", |
| 87 | + "\n", |
| 88 | + " def dump_json(self, path: str) -> None:\n", |
| 89 | + " with open(path, \"w\") as f:\n", |
| 90 | + " json.dump(self.impatient_json, f, indent=2)" |
84 | 91 | ]
|
85 | 92 | },
|
86 | 93 | {
|
|
89 | 96 | "metadata": {},
|
90 | 97 | "outputs": [],
|
91 | 98 | "source": [
|
92 |
| - "names: list[str] = []\n", |
93 |
| - "id: list[str] = []\n", |
94 |
| - "desc: list[str] = []\n", |
95 |
| - "synonymes: list[list[str]] = []\n", |
96 |
| - "\n", |
97 |
| - "for go_term in go[\"graphs\"][0][\"nodes\"]:\n", |
98 |
| - " if go_term[\"type\"] == \"CLASS\":\n", |
99 |
| - " id.append(go_term[\"id\"].split(\"/\")[-1])\n", |
100 |
| - " names.append(go_term[\"lbl\"])\n", |
101 |
| - " desc.append(go_term[\"meta\"][\"definition\"][\"val\"])\n", |
102 |
| - " synonymes.append([syn[\"val\"] for syn in go_term[\"meta\"][\"synonyms\"]])" |
| 99 | + "my_onto = ImpatientVocab()\n", |
| 100 | + "my_onto.load_json(\"ontology.json.demo\")\n", |
| 101 | + "my_onto.json_to_onto()\n", |
| 102 | + "my_onto.dump_onto(\"ontology_imp.obo\")" |
103 | 103 | ]
|
104 | 104 | },
|
105 |
| - { |
106 |
| - "cell_type": "code", |
107 |
| - "execution_count": null, |
108 |
| - "metadata": {}, |
109 |
| - "outputs": [], |
110 |
| - "source": [] |
111 |
| - }, |
112 | 105 | {
|
113 | 106 | "cell_type": "code",
|
114 | 107 | "execution_count": null,
|
115 | 108 | "metadata": {},
|
116 | 109 | "outputs": [],
|
117 | 110 | "source": [
|
118 |
| - "import jsonschema\n", |
119 |
| - "from jsonschema import validate\n", |
120 |
| - "\n", |
121 |
| - "impatient_json: list[dict] = []\n", |
122 |
| - "impatient_json_schema = {\n", |
123 |
| - " \"type\": \"object\",\n", |
124 |
| - " \"properties\": {\n", |
125 |
| - " \"id\": {\"type\": \"string\"},\n", |
126 |
| - " \"text\": {\"type\": \"string\"},\n", |
127 |
| - " \"icon\": {\"type\": \"boolean\"},\n", |
128 |
| - " \"data\": {\n", |
129 |
| - " \"type\": \"object\",\n", |
130 |
| - " \"properties\": {\n", |
131 |
| - " \"description\": {\"type\": \"string\"},\n", |
132 |
| - " \"synonymes\": {\"type\": \"string\"},\n", |
133 |
| - " \"phenotype_datamined\": {\"type\": \"string\"},\n", |
134 |
| - " \"gene_datamined\": {\"type\": \"string\"},\n", |
135 |
| - " \"alternative_language\": {\"type\": \"string\"},\n", |
136 |
| - " \"correlates_with\": {\"type\": \"string\"},\n", |
137 |
| - " \"image_annotation\": {\"type\": \"boolean\"},\n", |
138 |
| - " \"hex_color\": {\"type\": \"string\", \"pattern\": \"^#[0-9a-fA-F]{6}$\"},\n", |
139 |
| - " \"hpo_datamined\": {\"type\": \"string\"},\n", |
140 |
| - " },\n", |
141 |
| - " \"required\": [\n", |
142 |
| - " \"description\",\n", |
143 |
| - " \"synonymes\",\n", |
144 |
| - " \"phenotype_datamined\",\n", |
145 |
| - " \"gene_datamined\",\n", |
146 |
| - " \"alternative_language\",\n", |
147 |
| - " \"correlates_with\",\n", |
148 |
| - " \"image_annotation\",\n", |
149 |
| - " \"hex_color\",\n", |
150 |
| - " \"hpo_datamined\",\n", |
151 |
| - " ],\n", |
152 |
| - " },\n", |
153 |
| - " \"parent\": {\"type\": \"string\"},\n", |
154 |
| - " },\n", |
155 |
| - " \"required\": [\"id\", \"text\", \"icon\", \"data\", \"parent\"],\n", |
156 |
| - "}\n", |
157 |
| - "\n", |
158 |
| - "for index in range(len(id)):\n", |
159 |
| - " impatient_json.append(\n", |
160 |
| - " {\n", |
161 |
| - " \"id\": id[index].replace(\"_\", \":\"),\n", |
162 |
| - " \"text\": names[index],\n", |
163 |
| - " \"icon\": True,\n", |
164 |
| - " \"data\": {\n", |
165 |
| - " \"description\": desc[index],\n", |
166 |
| - " \"synonymes\": ','.join(synonymes[index]),\n", |
167 |
| - " \"phenotype_datamined\": \"\",\n", |
168 |
| - " \"gene_datamined\": \"\",\n", |
169 |
| - " \"alternative_language\": names[index],\n", |
170 |
| - " \"correlates_with\": \"\",\n", |
171 |
| - " \"image_annotation\": True if index==0 else False,\n", |
172 |
| - " \"hex_color\": \"#FFFFFF\",\n", |
173 |
| - " \"hpo_datamined\": \"\",\n", |
174 |
| - " },\n", |
175 |
| - " \"parent\": \"#\",\n", |
176 |
| - " }\n", |
177 |
| - " )\n", |
178 |
| - " \n", |
179 |
| - "for child, parent in edge_dict.items():\n", |
180 |
| - " try:\n", |
181 |
| - " index_term = id.index(child)\n", |
182 |
| - " except ValueError:\n", |
183 |
| - " print(f\"Term {child} not found in the list of terms\")\n", |
184 |
| - " continue\n", |
185 |
| - " # Only one parent so yeah we are loosing information.\n", |
186 |
| - " impatient_json[index_term][\"parent\"] = parent[0][0].replace(\"_\", \":\")" |
| 111 | + "my_onto = ImpatientVocab()\n", |
| 112 | + "my_onto.load_ontology(\"goslim_agr.obo\")\n", |
| 113 | + "my_onto.onto_to_json()\n", |
| 114 | + "my_onto.dump_json(\"obo_to_json_GO.json\")" |
187 | 115 | ]
|
188 | 116 | },
|
189 | 117 | {
|
|
192 | 120 | "metadata": {},
|
193 | 121 | "outputs": [],
|
194 | 122 | "source": [
|
195 |
| - "json.dump(impatient_json, open(\"impatient.json\", \"w\"))" |
| 123 | + "my_onto = ImpatientVocab()\n", |
| 124 | + "my_onto.load_ontology(\"ontology_imp.obo\")\n", |
| 125 | + "my_onto.onto_to_json()\n", |
| 126 | + "my_onto.dump_json(\"obo_to_json_IMP.json\")" |
196 | 127 | ]
|
197 | 128 | },
|
198 | 129 | {
|
|
201 | 132 | "metadata": {},
|
202 | 133 | "outputs": [],
|
203 | 134 | "source": [
|
204 |
| - "for idx, json_data in enumerate(impatient_json, start=1):\n", |
205 |
| - " validate(instance=json_data, schema=impatient_json_schema)" |
| 135 | + "my_onto = ImpatientVocab()\n", |
| 136 | + "my_onto.load_ontology(\"hp.owl\")\n", |
| 137 | + "my_onto.onto_to_json()\n", |
| 138 | + "my_onto.dump_json(\"obo_to_json_HPO.json\")" |
206 | 139 | ]
|
207 | 140 | }
|
208 | 141 | ],
|
|
0 commit comments