Skip to content

Commit df99ff3

Browse files
authored
Prototype Implementation - GOEA
Prototype Implementation - GOEA
2 parents 8b560fe + deb091d commit df99ff3

File tree

176 files changed

+75360
-1
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

176 files changed

+75360
-1
lines changed

Dockerfile

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# base image
2+
FROM ubuntu:latest
3+
4+
# set environment variables
5+
ENV PYTHONUNBUFFERED=1
6+
7+
# set working directory
8+
ENV DockerHOME=/home/app/GOEA
9+
RUN mkdir -p $DockerHOME
10+
WORKDIR $DockerHOME
11+
12+
# copy source files
13+
COPY . $DockerHOME
14+
15+
# expose port
16+
EXPOSE 8000
17+
18+
# install dependencies
19+
RUN apt-get update && apt-get install -y python3 python3-pip
20+
RUN pip install --break-system-packages --no-cache-dir -r requirements.txt
21+
22+
# start server
23+
CMD ["python3", "GOEA/manage.py", "runserver", "0.0.0.0:8000"]

GOEA/GOEA/__init__.py

Whitespace-only changes.
171 Bytes
Binary file not shown.
621 Bytes
Binary file not shown.
1.19 KB
Binary file not shown.
2.31 KB
Binary file not shown.
1.09 KB
Binary file not shown.
1.95 KB
Binary file not shown.
556 Bytes
Binary file not shown.

GOEA/GOEA/asgi.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
"""
2+
ASGI config for GOEA project.
3+
4+
It exposes the ASGI callable as a module-level variable named ``application``.
5+
6+
For more information on this file, see
7+
https://docs.djangoproject.com/en/5.0/howto/deployment/asgi/
8+
"""
9+
10+
import os
11+
12+
from django.core.asgi import get_asgi_application
13+
14+
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'GOEA.settings')
15+
16+
application = get_asgi_application()

GOEA/GOEA/content/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Ignore everything in this directory
2+
*
3+
# Except this file
4+
!.gitignore

GOEA/GOEA/forms.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from django import forms
2+
from GOEA.models import UploadedFiles
3+
4+
class UploadFilesForm(forms.ModelForm):
5+
class Meta:
6+
model = UploadedFiles
7+
fields = ['xes_file', 'owl_file', 'custom_ontology_used']
8+
9+
class APIKeyForm(forms.Form):
10+
key = forms.CharField(label='OpenAI API Key', max_length = 100)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

GOEA/GOEA/logic/event_abstractor.py

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
# Standard Library Imports
2+
import networkx as nx
3+
4+
# Third-Party Imports
5+
from django.conf import settings
6+
from pyvis.network import Network
7+
from rdflib import Graph, RDFS, OWL
8+
import pm4py
9+
10+
# Local Imports
11+
from GOEA.logic import prompts as p
12+
from GOEA.logic import utils as u
13+
14+
class EventAbstractor:
15+
"""Singleton class that abstracts events from a XES file using an ontology file."""
16+
17+
_instance = None
18+
19+
def __new__(cls, xes_path, owl_path):
20+
if not cls._instance:
21+
cls._instance = super().__new__(cls)
22+
return cls._instance
23+
24+
def __init__(self, xes_path=None, owl_path=None):
25+
self.xes_path = xes_path
26+
self.owl_path = owl_path
27+
self.data = None
28+
if xes_path and owl_path:
29+
self.xes_df = pm4py.read_xes(xes_path)
30+
self.ontology_graph = self._read_owl_file(owl_path)
31+
32+
@classmethod
33+
def get_instance(cls):
34+
if cls._instance is None:
35+
EventAbstractor()
36+
return cls._instance
37+
38+
def get_xes_df(self):
39+
return self.xes_df
40+
41+
def get_data(self):
42+
return self.data
43+
44+
def _read_owl_file(self, file_path):
45+
ontology_graph = Graph()
46+
ontology_graph.parse(file_path)
47+
return ontology_graph
48+
49+
def _get_subclasses(self, class_uri):
50+
return list(self.ontology_graph.subjects(RDFS.subClassOf, class_uri))
51+
52+
def _get_class_depth(self, class_uri, depth=0):
53+
subclasses = self._get_subclasses(class_uri)
54+
if not subclasses:
55+
return depth
56+
return max(self._get_class_depth(subclass, depth + 1) for subclass in subclasses)
57+
58+
def get_max_depth(self):
59+
root_class = OWL.Thing
60+
return self._get_class_depth(root_class)
61+
62+
def _create_ontology_string(self, class_uri, processed_classes, selected_depth, current_depth=0, indent=""):
63+
ontology_string = ""
64+
if current_depth < selected_depth:
65+
if class_uri not in processed_classes:
66+
processed_classes.add(class_uri)
67+
subclasses = self._get_subclasses(class_uri)
68+
for subclass in subclasses:
69+
ontology_string += self._create_ontology_string(
70+
subclass, processed_classes, selected_depth, current_depth + 1, indent
71+
)
72+
else:
73+
class_label = self._get_label(class_uri)
74+
ontology_string += indent + f"{current_depth}. " + class_label + "\n"
75+
if class_uri not in processed_classes:
76+
processed_classes.add(class_uri)
77+
subclasses = self._get_subclasses(class_uri)
78+
for subclass in subclasses:
79+
ontology_string += self._create_ontology_string(
80+
subclass, processed_classes, selected_depth, current_depth + 1, indent + " "
81+
)
82+
return ontology_string
83+
84+
def create_ontology_representation(self, selected_depth):
85+
root_class = OWL.Thing
86+
processed_classes = set()
87+
ontology_string = self._create_ontology_string(root_class, processed_classes, selected_depth)
88+
return ontology_string
89+
90+
def _create_visualization_graph(self):
91+
visualization_graph = nx.DiGraph()
92+
root_class = OWL.Thing
93+
self._add_nodes_recursive(visualization_graph, root_class, 0)
94+
return visualization_graph
95+
96+
def _add_nodes_recursive(self, graph, node, depth):
97+
node_label = self._get_label(node)
98+
graph.add_node(node_label, depth=depth)
99+
100+
subclasses = list(self.ontology_graph.subjects(RDFS.subClassOf, node))
101+
for subclass in subclasses:
102+
subclass_label = self._get_label(subclass)
103+
graph.add_edge(node_label, subclass_label)
104+
self._add_nodes_recursive(graph, subclass, depth + 1)
105+
106+
def _get_label(self, uri):
107+
label = self.ontology_graph.value(uri, RDFS.label)
108+
if label is None:
109+
label = uri.split("/")[-1].replace("_", " ")
110+
return str(label)
111+
112+
def visualize_graph(self, abstraction_level):
113+
visualization_graph = self._create_visualization_graph()
114+
nodes_to_add = [node for node in visualization_graph.nodes() if str(node) != "owl#Thing"]
115+
116+
net = Network(height='800px', width='100%', bgcolor='#ffffff', font_color='black')
117+
net.from_nx(visualization_graph.subgraph(nodes_to_add))
118+
119+
marked_nodes = {node for node, data in visualization_graph.nodes(data=True) if data['depth'] == abstraction_level}
120+
descendants = {descendant for node in marked_nodes for descendant in nx.descendants(visualization_graph, node)}
121+
122+
for node in nodes_to_add:
123+
net_node = net.get_node(node)
124+
if node in marked_nodes:
125+
net_node['color'] = '#FF6A00' # Orange color for target abstraction level
126+
net_node['size'] = 20
127+
elif node in descendants:
128+
net_node['color'] = '#0D6EFD' # Blue for potential abstraction
129+
else:
130+
net_node['color'] = '#808080' # Grey for not considered nodes
131+
132+
for edge in net.edges:
133+
source, target = edge['from'], edge['to']
134+
if (source in marked_nodes and target in descendants) or (source in descendants and target in descendants):
135+
edge['color'] = '#0D6EFD' # Blue for edges of potential abstraction
136+
edge['width'] = 2.5
137+
138+
net.repulsion(node_distance=420, central_gravity=0.33, spring_length=110, spring_strength=0.10, damping=0.95)
139+
140+
html_file = net.generate_html()
141+
modified_html = html_file.replace('lib/bindings/utils.js', f'{settings.STATIC_URL}js/utils.js')
142+
143+
return modified_html
144+
145+
def abstract(self, view, abstraction_level, custom_ontology_used):
146+
event_log_df = self.xes_df
147+
148+
total_rows = len(event_log_df)
149+
event_log_df["medication"] = event_log_df.apply(lambda row: self._start_extraction_medication(row, view, total_rows), axis=1)
150+
151+
event_log_df["normalized_medication"] = event_log_df.apply(lambda row: self._start_normalization_medication(row, view, total_rows), axis=1)
152+
ontology_string = self.create_ontology_representation(abstraction_level)
153+
total_rows = len(event_log_df)
154+
event_log_df["abstracted_medication"] = event_log_df.apply(
155+
lambda row: self._start_medication_abstraction(
156+
row, ontology_string, abstraction_level, custom_ontology_used, view, total_rows
157+
),
158+
axis=1
159+
)
160+
161+
self.data = event_log_df
162+
return event_log_df
163+
164+
def _start_extraction_medication(self, row, view, total_rows):
165+
extracted_medication = self._extract_medication(row["activity"])
166+
row_number = row.name
167+
self._update_progress(view, row_number, total_rows, "Extracting Drug or Medicament of Activities")
168+
return extracted_medication
169+
170+
def _start_normalization_medication(self, row, view, total_rows):
171+
normalized_medication = "N/A"
172+
if row["medication"] != "N/A":
173+
normalized_medication = self._normalize_medication(row["medication"])
174+
row_number = row.name
175+
self._update_progress(view, row_number, total_rows, "Normalizing Drug or Medicament of Extracted Medication")
176+
return normalized_medication
177+
178+
def _start_medication_abstraction(self, row, ontology_string, abstraction_level, custom_ontology_used, view, total_rows):
179+
abstracted_medication = "N/A"
180+
medication = row["normalized_medication"]
181+
if medication != "N/A":
182+
abstracted_medication = self._abstract_medication(ontology_string, medication, abstraction_level, custom_ontology_used)
183+
row_number = row.name
184+
self._update_progress(view, row_number, total_rows, "Abstracting Drug Medicament on Target Abstraction Level")
185+
return abstracted_medication
186+
187+
@staticmethod
188+
def _extract_medication(activity):
189+
extraction_messages = p.EXTRACTION_MESSAGES[:]
190+
extraction_messages.append(
191+
{
192+
"role": "user",
193+
"content": activity,
194+
}
195+
)
196+
extracted_medication = u.query_gpt(messages=extraction_messages)
197+
return extracted_medication
198+
199+
@staticmethod
200+
def _normalize_medication(extracted_medication):
201+
extraction_messages = p.NORMALIZATION_MESSAGES[:]
202+
extraction_messages.append(
203+
{
204+
"role": "user",
205+
"content": extracted_medication,
206+
}
207+
)
208+
normalized_medication = u.query_gpt(messages=extraction_messages)
209+
return normalized_medication
210+
211+
@staticmethod
212+
def _abstract_medication(ontology, medication, abstraction_level, custom_ontology_used):
213+
if custom_ontology_used:
214+
abstraction_messages = p.CUSTOM_ABSTRACTION_MESSAGES[:]
215+
abstraction_messages.extend([
216+
{
217+
"role": "user",
218+
"content": (
219+
"Here the hierarchy you should use as reference: \n" + ontology +
220+
"\n Classify the medication in one of the uppermost classes on the target abstraction level. "
221+
"If it does not fit in any classes, return N/A. \n" +
222+
"The target abstraction level should be: " + "'" + str(abstraction_level) + ".'"
223+
),
224+
},
225+
{
226+
"role": "user",
227+
"content": "In which category on abstraction level: " + str(abstraction_level) + " would " + medication + " fit in?",
228+
}
229+
])
230+
else:
231+
abstraction_messages = p.ABSTRACTION_MESSAGES[:]
232+
abstraction_messages.extend([
233+
{
234+
"role": "user",
235+
"content": (
236+
"Here the hierarchy you should use as reference: \n" + ontology +
237+
"\n Check if the following medicine is part of the hierarchy and map them to the uppermost class on the target abstraction level. "
238+
"If the term is not part of the hierarchy, return N/A. \n" +
239+
"The target abstraction level should be: " + "'" + str(abstraction_level) + ".'"
240+
),
241+
},
242+
{
243+
"role": "user",
244+
"content": "What is the uppermost class of " + medication + " which is on the level: " + str(abstraction_level) + "?",
245+
}
246+
])
247+
248+
abstracted_medication = u.query_gpt(messages=abstraction_messages)
249+
return abstracted_medication
250+
251+
def _update_progress(self, view, current_step, total_steps, status):
252+
"""Update the progress of the extraction."""
253+
if view is not None:
254+
percentage = round((current_step / total_steps) * 100)
255+
view.request.session["progress"] = percentage
256+
view.request.session["status"] = status
257+
view.request.session.save()

GOEA/GOEA/logic/function_calls.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""Module providing functions for using OpenAI function calling."""
2+
TOOLS = [
3+
{
4+
"type": "function",
5+
"function": {
6+
"name": "extract_medication_rows",
7+
"description": "this function extracts only the relevant rows from a table which are related to medications",
8+
"parameters": {
9+
"type": "object",
10+
"properties": {
11+
"output": {
12+
"type": "array",
13+
"items": {
14+
"type": "string",
15+
"description": "a row from the table which is related to medications",
16+
},
17+
},
18+
},
19+
"required": ["output"],
20+
},
21+
},
22+
},
23+
]

0 commit comments

Comments
 (0)