From 311df5e75a24c4b9e490a0388ab8aa05d4637ed4 Mon Sep 17 00:00:00 2001
From: Johannes Radebold <johannes.radebold@rwth-aachen.de>
Date: Thu, 9 Nov 2023 17:24:08 +0100
Subject: [PATCH 1/2] Beginn working on "show more" function, search
 suggestions for 3 <= input characters

---
 app/Entirety/static/js/semantics.js | 96 +++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 5 deletions(-)
diff --git a/app/Entirety/static/js/semantics.js b/app/Entirety/static/js/semantics.js
index d7d24a16..b0f66202 100644
--- a/app/Entirety/static/js/semantics.js
+++ b/app/Entirety/static/js/semantics.js
@@ -483,6 +483,7 @@ function autoComplete(event) {
     var currentFocus;
     event.addEventListener("input", function (e) {
 
+
         var selectedOption = document.querySelector('.form-select-sm[name="searchOptions"] option:checked');
         var selectedValue = selectedOption.getAttribute('data-value');
         var arr = getData(selectedValue)
@@ -492,6 +493,11 @@ function autoComplete(event) {
         if (!val) {
             return false;
         }
+        if (val.length <= 3) {
+            return false;
+        }
+        console.log("test")
+        console.log(arr.length)
         currentFocus = -1;
         a = document.createElement("DIV")
         a.setAttribute("id", this.id + "autocomplete-list")
@@ -500,8 +506,41 @@ function autoComplete(event) {
 
 
         this.parentNode.appendChild(a)
+        //     var count = 0;
+        //     for (i = 0; i < arr.length; i++) {
+        //         if (arr[i].substr(0, val.length).toUpperCase() === val.toUpperCase()) {
+        //             b = document.createElement("DIV");
+        //             b.innerHTML = "<strong>" + arr[i].substr(0, val.length) + "</strong>";
+        //             b.innerHTML += arr[i].substr(val.length);
+        //             b.innerHTML += "<input type='hidden' value='" + arr[i] + "'>";
+        //             b.addEventListener("click", function (e) {
+        //                 event.value = this.getElementsByTagName("input")[0].value;
+        //                 closeAllLists();
+        //             });
+        //             var container = document.querySelector('.autocomplete-items');
+        //             container.style.width = inputwidth.offsetWidth + 'px';
+        //             a.appendChild(b);
+        //             count++;
+        //         }
+        //
+        //         if (count > 3) {
+        //             var showMore = document.createElement("DIV");
+        //             showMore.innerHTML = "Mehr Ergebnisse anzeigen...";
+        //             showMore.style.color = "blue";
+        //             showMore.style.cursor = "pointer";
+        //             showMore.addEventListener("click", function (e) {
+        //             });
+        //             a.appendChild(showMore);
+        //             break;
+        //         }
+        //     }
+        var count = 0;
+        var index = 0;
         for (i = 0; i < arr.length; i++) {
-            if (arr[i].substr(0, val.length).toUpperCase() == val.toUpperCase()) {
+            if (arr[i].substr(0, val.length).toUpperCase() === val.toUpperCase()) {
+                if (count >= 3) {
+                    break;
+                }
                 b = document.createElement("DIV");
                 b.innerHTML = "<strong>" + arr[i].substr(0, val.length) + "</strong>";
                 b.innerHTML += arr[i].substr(val.length);
@@ -512,21 +551,54 @@ function autoComplete(event) {
                 });
                 var container = document.querySelector('.autocomplete-items');
                 container.style.width = inputwidth.offsetWidth + 'px';
-                console.log(inputwidth.offsetWidth + 'px')
                 a.appendChild(b);
+                count++;
+                index = i;
             }
         }
+        if (index < arr.length - 1) {
+            var showMore = document.createElement("DIV");
+            showMore.innerHTML = "Mehr Ergebnisse anzeigen...";
+            showMore.style.color = "blue";
+            showMore.style.cursor = "pointer";
+            a.appendChild(showMore);
+            showMore.addEventListener("click", function (e) {
+                while (a.firstChild) {
+                    a.removeChild(a.firstChild);
+                }
+                count = 0;
+                for (i = index + 1; i < arr.length && count < 3; i++) {
+                    if (arr[i].substr(0, val.length).toUpperCase() === val.toUpperCase()) {
+                        b = document.createElement("DIV");
+                        b.innerHTML = "<strong>" + arr[i].substr(0, val.length) + "</strong>";
+                        b.innerHTML += arr[i].substr(val.length);
+                        b.innerHTML += "<input type='hidden' value='" + arr[i] + "'>";
+                        b.addEventListener("click", function (e) {
+                            event.value = this.getElementsByTagName("input")[0].value;
+                            closeAllLists();
+                        });
+                        a.appendChild(b);
+                        count++;
+                        index = i;
+                    }
+                }
+                if (index < arr.length - 1) {
+                    a.appendChild(showMore);
+                }
+            });
+        }
     });
+
     event.addEventListener("keydown", function (e) {
         var x = document.getElementById(this.id + "autocomplete-list");
         if (x) x = x.getElementsByTagName("div");
-        if (e.keyCode == 40) {
+        if (e.keyCode === 40) {
             currentFocus++;
             addActive(x);
-        } else if (e.keyCode == 38) {
+        } else if (e.keyCode === 38) {
             currentFocus--;
             addActive(x);
-        } else if (e.keyCode == 13) {
+        } else if (e.keyCode === 13) {
             e.preventDefault();
             if (currentFocus > -1) {
                 if (x) x[currentFocus].click();
@@ -534,6 +606,9 @@ function autoComplete(event) {
         }
     });
 
+    function generateList(arr) {
+    }
+
     function addActive(x) {
         /*a function to classify an item as "active":*/
         if (!x) return false;
@@ -575,6 +650,17 @@ function autoComplete(event) {
         }
     }
 
+    // document.addEventListener("click", function (e) {
+    //     var target = e.target;
+    //     while (target != null) {
+    //         if (target === showMore) {
+    //             return;
+    //         }
+    //         target = target.parentElement;
+    //     }
+    //     closeAllLists(e.target);
+    // });
+
     document.addEventListener("click", function (e) {
         closeAllLists(e.target);
     });

From 221971e53837d5b834b5386d1d414f9ad4b899ca Mon Sep 17 00:00:00 2001
From: Johannes Radebold <johannes.radebold@rwth-aachen.de>
Date: Fri, 24 Nov 2023 21:36:00 +0100
Subject: [PATCH 2/2] New PrepData algorithm -less for loops -overall better
 Performance

---
 app/Entirety/semantics/prepDataSemantics.py | 356 +++++++++++++-------
 app/Entirety/semantics/views.py             |  63 +---
 2 files changed, 249 insertions(+), 170 deletions(-)

diff --git a/app/Entirety/semantics/prepDataSemantics.py b/app/Entirety/semantics/prepDataSemantics.py
index 5d9216fb..5a381e67 100644
--- a/app/Entirety/semantics/prepDataSemantics.py
+++ b/app/Entirety/semantics/prepDataSemantics.py
@@ -2,135 +2,253 @@
 import json
 from projects.mixins import ProjectContextMixin
 from entities.requests import get_entities_list
+import numpy as np
+from projects.models import Project
+from filip.clients.ngsi_v2.client import ContextBrokerClient
 
 
 class PrepData(ProjectContextMixin):
+    def __init__(self, project: Project):
+        self.project = project
+        self.elements = []
+        self.entity_ids_list = []
+        self.entity_types_list = []
+        self.entity_names_list = []
+        self.rel_names_list = []
+
+
+    def generate_df(self,):
 
-    def generate_df(self):
         """
         Generates a pandas DataFrame and a list of Cytoscape elements from a list of entities.
 
         Returns:
-            list: A list of Cytoscape elements containing information about nodes and edges.
-                Each element is a dictionary with keys 'data' and 'classes', where 'data' is another dictionary
-                containing information about the node or edge, and 'classes' is a string specifying the type of the entity.
+            None
         """
+        # context=super().get_context_data(**kwargs)
+        # print(context)
+        # print(ProjectContextMixin.project)
+        # print(self.project)
         entity_list = get_entities_list(self, ".*", "", self.project)
-        entity_id_list = []
-        entity_type_list = []
-        entity_name_list = []
-        relationship_name_list = []
-        relationship_target_list = []
+        # entity_id_list = []
+        # entity_type_list = []
+        # entity_name_list = []
+        # relationship_name_list = []
+        # relationship_target_list = []
+        entity_relevant_infos_list = []  # contains only information of an entity relevant to build the graph
 
         for entity in entity_list:
             entity_json = json.loads(entity.json())
-
-            entity_id_list.append(entity.id)
-            entity_type_list.append(entity.type)
-
-            all_entity_values = self.all_values(entity_json)
-            entity_name = 'No name Set'  # default value if no "name" attribute is present
-            relationship_target = []
-            relationship_name = []
-
-            for key, value in all_entity_values:
-                if key == 'name.value':
-                    entity_name = value
-                elif value == 'Relationship':
-                    rel_name_str = key.rsplit('.type', 1)[0]
-                    for next_key, next_value in all_entity_values:
-                        if next_key == f"{rel_name_str}.value":
-                            if isinstance(next_value, list):
-                                relationship_target.extend(next_value)
-                                relationship_name.extend([rel_name_str] * len(next_value))
-                            else:
-                                relationship_target.append(next_value)
-                                relationship_name.append(rel_name_str)
-
-            entity_name_list.append(entity_name)
-            relationship_name_list.append(relationship_name)
-            relationship_target_list.append(relationship_target)
-
-        data = {'id': entity_id_list,
-                'type': entity_type_list,
-                'name': entity_name_list,
-                'relationship_name': relationship_name_list,
-                'relationship_with': relationship_target_list
-                }
-        self.df = pd.DataFrame(data, columns=['id', 'type', 'name', 'relationship_name', 'relationship_with'])
-
-        # generate cytoscape elements
-
-        cy_nodes = []
-        cy_edges = []
-        elements = []
-        nodes = set()
-
-        for index, row in self.df.iterrows():
-            source, label, source_type, target, target_label = row['id'], \
-                                                               row['name'], \
-                                                               row['type'], \
-                                                               row['relationship_with'], \
-                                                               row['relationship_name']
-            # finding all parents of 'source'
-            parents = []
-            bool_series = self.df['relationship_with'].apply(lambda
-                                                                 cell: source in cell)  # get pd.Series with all rows wich contains the id marked as True, otherwise False
-            index_list_true = bool_series[bool_series].index  # get index of rows which are true
-            for val in index_list_true:
-                parents.append(self.df.iloc[val, 0])
-            if source not in nodes:
-                nodes.add(source)
-                cy_nodes.append(
-                    {"data": {"id": source, "label": label, "children": target, "parents": parents},
-                     "classes": source_type})
-
-            for i, j in zip(target, target_label):
-                cy_edges.append({"data": {"id": source + i, "source": source, "target": i, "label": j, }})
-        # Check if the target id of an edge has a target Node. If not a node will be added, however ther is no real entity behind it
-        # (necessary, otherwise cytoscape fails)
-        for edge in cy_edges:
-            for key, value in edge.items():
-                if value.get("target") not in nodes:
-                    nodes.add(value.get('target'))
-                    cy_nodes.append(
-                        {"data": {"id": value.get("target"), "label": "This Relationship has no target entity"}})
-
-        for i in cy_nodes:
-            elements.append(i)
-        for j in cy_edges:
-            elements.append(j)
-
-        entity_name_list_unique = list(set(entity_name_list))
-
-        return elements, entity_id_list, entity_name_list_unique
-
-    def types(self):
-        """
-        Creates a list with all possible types for filtering by type.
-        Therefore, this method itterates through the generateted df from prep_data.py.
-        Will be executeted once upon starting the App.
-        :param df: imported df from prep_data.py
-        :return: options (list with all possible types)
-        """
-        options = []
-        for type in self.df['type'].unique():
-            options.append(type)
-        return options
-
-    def relationships(self):
-        """
-        Creates a list with all possible relationships for filtering by relationship.
-        Therefore, this method itterates through the generateted df from prep_data.py.
-        Will be executeted once upon starting the App.
-        :param df:
-        :return: options (list with all possible relationships)
-        """
-        options = []
-        all_rel = []
-        for rel_list in self.df['relationship_name']:
-            for rel in rel_list:
-                if rel not in all_rel:
-                    all_rel.append(rel)
-                    options.append(rel)
-        return options
+            entity_id, entity_typ, entity_name = get_entity_dict(entity)
+            relationship_name_list, relationship_target_list = get_relationships(entity_json)
+            entity_relevant_infos_list.append(
+                [entity_id, entity_typ, entity_name, relationship_name_list, relationship_target_list])
+
+        # Build dataframe
+        entities_df = pd.DataFrame(entity_relevant_infos_list,
+                                   columns=['id', 'type', 'name', 'relationship_name', 'relationship_target'])
+        # Splits list in entities_df into new rows
+        entities_exploded_df = entities_df.copy()
+        entities_exploded_df = entities_exploded_df.explode(['relationship_name', 'relationship_target'])
+
+        # findes parents to each entity in column 'id' and appends in new column ['parents'] to entities_parents_df
+        entities_parents_df = entities_exploded_df.copy()
+        parents_dict = entities_parents_df.groupby('relationship_target')['id'].apply(list).to_dict()
+        entities_parents_df['parents'] = entities_parents_df['id'].apply(
+            lambda row: parents_dict[row] if row in entities_parents_df['relationship_target'].values else np.nan)
+        # Again split lists in column 'parents' into new ros
+        entities_parents_exploded_df = entities_parents_df.explode('parents')
+        # now each row represents an individual path from parents-> entity -> child
+
+        # Group dataframe by rows 'relationship_name', 'relationship_target' and 'parents' together, so that in
+        # now each entity is represented with its parents and children and reset index
+        entities_to_nodes_df = entities_parents_exploded_df.groupby(['id', 'type', 'name']).agg({
+            'relationship_name': list,
+            'relationship_target': list,
+            'parents': list
+        }).reset_index()
+        # Future prove code, from version 2.1.0 onwards .applymap was depreciated, it was renamed to .map
+        # Replace np.nan with []. Up until now, np.nan represented no parents, children, or relationship names.
+        # Empty lists are needed for the node generation and input format of cytoscape.js. np.nan is not accepted
+        if pd.__version__ >= '2.1.0':
+            entities_to_nodes_df = entities_to_nodes_df.map(
+                lambda val: [] if isinstance(val, list) and not pd.notna(val).any() else val)
+        else:
+            entities_to_nodes_df = entities_to_nodes_df.applymap(
+                lambda val: [] if isinstance(val, list) and not pd.notna(val).any() else val)
+
+        # Generate nodes
+        entities_to_nodes_df['node'] = entities_to_nodes_df.apply(generate_nodes, axis=1)
+        nodes_list = entities_to_nodes_df['node'].tolist()
+
+        # Generate edges
+        edges_list = entities_exploded_df.apply(
+            (lambda row: generate_edges(row) if isinstance(row['relationship_target'], str) else np.nan),
+            axis=1).dropna().tolist()
+
+        self.elements = nodes_list + edges_list
+
+        all_entity_ids_unique = (
+                    entities_exploded_df['id'] + entities_exploded_df['relationship_target']).dropna().unique()
+        all_entity_types_unique = entities_exploded_df['type'].dropna().unique()
+        all_entity_names_unique = entities_exploded_df['name'].dropna().unique()
+        all_rel_names_unique = entities_exploded_df['relationship_name'].dropna().unique()
+
+        self.entity_ids_list = all_entity_ids_unique.tolist()
+        self.entity_types_list = all_entity_types_unique.tolist()
+        self.entity_names_list = all_entity_names_unique.tolist()
+        self.rel_names_list = all_rel_names_unique.tolist()
+
+    #     #     entity_id_list.append(entity.id)
+    #     #     entity_type_list.append(entity.type)
+    #     #
+    #     #     all_entity_values = self.all_values(entity_json)
+    #     #     entity_name = 'No name Set'  # default value if no "name" attribute is present
+    #     #     relationship_target = []
+    #     #     relationship_name = []
+    #     #
+    #     #     for key, value in all_entity_values:
+    #     #         if key == 'name.value':
+    #     #             entity_name = value
+    #     #         elif value == 'Relationship':
+    #     #             rel_name_str = key.rsplit('.type', 1)[0]
+    #     #             for next_key, next_value in all_entity_values:
+    #     #                 if next_key == f"{rel_name_str}.value":
+    #     #                     if isinstance(next_value, list):
+    #     #                         relationship_target.extend(next_value)
+    #     #                         relationship_name.extend([rel_name_str] * len(next_value))
+    #     #                     else:
+    #     #                         relationship_target.append(next_value)
+    #     #                         relationship_name.append(rel_name_str)
+    #     #
+    #     #     entity_name_list.append(entity_name)
+    #     #     relationship_name_list.append(relationship_name)
+    #     #     relationship_target_list.append(relationship_target)
+    #     #
+    #     # data = {'id': entity_id_list,
+    #     #         'type': entity_type_list,
+    #     #         'name': entity_name_list,
+    #     #         'relationship_name': relationship_name_list,
+    #     #         'relationship_with': relationship_target_list
+    #     #         }
+    #     # self.df = pd.DataFrame(data, columns=['id', 'type', 'name', 'relationship_name', 'relationship_with'])
+    #
+    #     # generate cytoscape elements
+    #
+    #     cy_nodes = []
+    #     cy_edges = []
+    #     elements = []
+    #     nodes = set()
+    #
+    #     for index, row in self.df.iterrows():
+    #         source, label, source_type, target, target_label = row['id'], \
+    #                                                            row['name'], \
+    #                                                            row['type'], \
+    #                                                            row['relationship_with'], \
+    #                                                            row['relationship_name']
+    #         # finding all parents of 'source'
+    #         parents = []
+    #         bool_series = self.df['relationship_with'].apply(lambda
+    #                                                              cell: source in cell)  # get pd.Series with all rows wich contains the id marked as True, otherwise False
+    #         index_list_true = bool_series[bool_series].index  # get index of rows which are true
+    #         for val in index_list_true:
+    #             parents.append(self.df.iloc[val, 0])
+    #         if source not in nodes:
+    #             nodes.add(source)
+    #             cy_nodes.append(
+    #                 {"data": {"id": source, "label": label, "children": target, "parents": parents},
+    #                  "classes": source_type})
+    #
+    #         for i, j in zip(target, target_label):
+    #             cy_edges.append({"data": {"id": source + i, "source": source, "target": i, "label": j, }})
+    #     # Check if the target id of an edge has a target Node. If not a node will be added, however ther is no real entity behind it
+    #     # (necessary, otherwise cytoscape fails)
+    #     for edge in cy_edges:
+    #         for key, value in edge.items():
+    #             if value.get("target") not in nodes:
+    #                 nodes.add(value.get('target'))
+    #                 cy_nodes.append(
+    #                     {"data": {"id": value.get("target"), "label": "This Relationship has no target entity"}})
+    #
+    #     for i in cy_nodes:
+    #         elements.append(i)
+    #     for j in cy_edges:
+    #         elements.append(j)
+    #
+    #     entity_name_list_unique = list(set(entity_name_list))
+    #
+    #     return elements, entity_id_list, entity_name_list_unique
+    #
+    # def types(self):
+    #     """
+    #     Creates a list with all possible types for filtering by type.
+    #     Therefore, this method itterates through the generateted df from prep_data.py.
+    #     Will be executeted once upon starting the App.
+    #     :param df: imported df from prep_data.py
+    #     :return: options (list with all possible types)
+    #     """
+    #     options = []
+    #     for type in self.df['type'].unique():
+    #         options.append(type)
+    #     return options
+    #
+    # def relationships(self):
+    #     """
+    #     Creates a list with all possible relationships for filtering by relationship.
+    #     Therefore, this method itterates through the generateted df from prep_data.py.
+    #     Will be executeted once upon starting the App.
+    #     :param df:
+    #     :return: options (list with all possible relationships)
+    #     """
+    #     options = []
+    #     all_rel = []
+    #     for rel_list in self.df['relationship_name']:
+    #         for rel in rel_list:
+    #             if rel not in all_rel:
+    #                 all_rel.append(rel)
+    #                 options.append(rel)
+    #     return options
+
+
+def get_entity_dict(entity):
+    return entity.id, entity.type, entity.name.value if entity.name else 'no_name',
+
+
+def get_relationships(entity_dict):
+    relationships_name_list = []
+    relationships_target_list = []
+    for key, value in entity_dict.items():
+        if isinstance(value, dict) and value.get('type') == 'Relationship':
+            relationships = value.get('value')
+            if isinstance(relationships, list):
+                for rel in relationships:
+                    relationships_name_list.append(key)
+                    relationships_target_list.append(rel)
+            else:
+                relationships_name_list.append(key)
+                relationships_target_list.append(relationships)
+
+    return relationships_name_list, relationships_target_list
+
+
+def generate_nodes(row):
+    return {
+        'data': {
+            'id': row['id'],
+            'label': row['name'],
+            'children': row['relationship_target'],
+            'parents': row['parents']
+        },
+        'classes': row['type']
+    }
+
+
+def generate_edges(row):
+    return {
+        'data': {
+            'id': row['id'] + row['relationship_target'],
+            'source': row['id'],
+            'target': row['relationship_target'],
+            'label': row['relationship_name']
+        }
+    }
diff --git a/app/Entirety/semantics/views.py b/app/Entirety/semantics/views.py
index 58d899c3..5b2365e6 100644
--- a/app/Entirety/semantics/views.py
+++ b/app/Entirety/semantics/views.py
@@ -1,12 +1,11 @@
-from django.views.generic import TemplateView
-from django.shortcuts import redirect
-from projects.models import Project
-from django.http import JsonResponse
 import json
+
+from django.http import JsonResponse
+from django.views.generic import TemplateView
+
+from entities.requests import get_entity
 from projects.mixins import ProjectContextMixin
 from semantics.prepDataSemantics import PrepData
-from django.shortcuts import render
-from entities.requests import get_entity
 
 
 class SemanticsVisualizer(ProjectContextMixin, TemplateView):
@@ -14,12 +13,13 @@ class SemanticsVisualizer(ProjectContextMixin, TemplateView):
 
     def get_context_data(self, **kwargs):
         context = super().get_context_data(**kwargs)
-        # PrepData.generate_df(self)
-        context['elements'] = PrepData.generate_df(self)[0]
-        context['types'] = PrepData.types(self)
-        context['relationships'] = PrepData.relationships(self)
-        context['entity_ids'] = PrepData.generate_df(self)[1]
-        context['entity_names'] = PrepData.generate_df(self)[2]
+        prep = PrepData(project=self.project)
+        prep.generate_df()
+        context['elements'] = prep.elements
+        context['types'] = prep.entity_types_list
+        context['relationships'] = prep.rel_names_list
+        context['entity_ids'] = prep.entity_ids_list
+        context['entity_names'] = prep.entity_names_list
         return context
 
     def post(self, request, project_id, *args, **kwargs):
@@ -39,45 +39,6 @@ def post(self, request, project_id, *args, **kwargs):
 
         return JsonResponse({'entity': table_data})
 
-    def all_values(self, dict_obj, parent_key=''):
-        """
-        Recursively yields all keys and values in a nested dictionary.
-
-        This function iterates over all key-value pairs in the given dictionary, and recursively
-        yields all keys and values in any nested dictionaries. The yielded keys are generated by
-        concatenating the parent key and the current key, separated by a dot ('.'), if applicable.
-
-        Args:
-            dict_obj (dict): The dictionary to extract keys and values from.
-            parent_key (str, optional): The parent key to be used for nested dictionaries.
-                Defaults to an empty string.
-
-        Yields:
-            tuple: A tuple containing the current key and value as (key, value).
-
-        Example:
-            >>> my_dict = {'a': 1, 'b': {'c': 2, 'd': {'e': 3}}}
-            >>> for key, value in all_values(my_dict):
-            ...     print(key, value)
-            a 1
-            b.c 2
-            b.d.e 3
-
-        """
-
-        # Iterate over all key-value pairs in the passed dictionary
-        for key, value in dict_obj.items():
-            # Generate the current key by concatenating the parent key and the current key
-            current_key = f"{parent_key}.{key}" if parent_key else key
-
-            # If value is of dictionary type then recursively yield all keys and values
-            # in that nested dictionary
-            if isinstance(value, dict):
-                yield from self.all_values(value, current_key)
-            else:
-                # Yield the current key and value as a tuple
-                yield current_key, value
-
 
 class LdVisualizer(ProjectContextMixin, TemplateView):
     templet_name = "semantics/semantics_LdVisualize.html"