From 65411d408cae6d51161e199a53e116cc3f6ca419 Mon Sep 17 00:00:00 2001
From: Alkid <alkid1baci@gmail.com>
Date: Wed, 28 Feb 2024 16:16:12 +0100
Subject: [PATCH 1/8] updated example

---
 examples/example_knowledge_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/example_knowledge_base.py b/examples/example_knowledge_base.py
index e85f94ff..a4ca8b82 100644
--- a/examples/example_knowledge_base.py
+++ b/examples/example_knowledge_base.py
@@ -38,7 +38,7 @@
 print('*' * 100)
 
 # Direct concept hierarchy from Top to Bottom.
-for concept in kb.class_hierarchy().items():
+for concept in kb.class_hierarchy.items():
     print(f'{concept.get_iri().as_str()} => {[c.get_iri().as_str() for c in kb.get_direct_sub_concepts(concept)]}')
 print('*' * 100)
 

From 2c637b7deb45cf4268f148eb9561843b47bae5cb Mon Sep 17 00:00:00 2001
From: Alkid <alkid1baci@gmail.com>
Date: Thu, 29 Feb 2024 12:29:58 +0100
Subject: [PATCH 2/8] `clean` method now cleans cache too #337

---
 ontolearn/concept_learner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ontolearn/concept_learner.py b/ontolearn/concept_learner.py
index f148f78f..aa74fd4b 100644
--- a/ontolearn/concept_learner.py
+++ b/ontolearn/concept_learner.py
@@ -791,6 +791,7 @@ def __init__(self,
         self.__setup()
 
     def __setup(self):
+        self._cache = dict()
         self.clean()
         if self.fitness_func is None:
             self.fitness_func = LinearPressureFitness()
@@ -810,7 +811,6 @@ def __setup(self):
         self._result_population = None
         self._dp_to_prim_type = dict()
         self._dp_splits = dict()
-        self._cache = dict()
         self._split_properties = []
 
         self.pset = self.__build_primitive_set()
@@ -1059,7 +1059,7 @@ def clean(self):
             del creator.Quality
         except AttributeError:
             pass
-
+        self._cache.clear()
         super().clean()
 
 

From ad3d318d1483829835962268dd355fe4fef9bd92 Mon Sep 17 00:00:00 2001
From: Alkid <alkid1baci@gmail.com>
Date: Thu, 29 Feb 2024 13:06:55 +0100
Subject: [PATCH 3/8] Facilitated optional installation

---
 setup.py | 47 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/setup.py b/setup.py
index d828fe43..2bd4d451 100644
--- a/setup.py
+++ b/setup.py
@@ -1,14 +1,10 @@
 from setuptools import setup, find_packages
+import re
 
 with open('README.md', 'r') as fh:
     long_description = fh.read()
-setup(
-    name="ontolearn",
-    description="Ontolearn is an open-source software library for structured machine learning in Python. Ontolearn includes modules for processing knowledge bases, inductive logic programming and ontology engineering.",
-    version="0.6.2",
-    packages=find_packages(),
-    install_requires=[
-        "scikit-learn>=0.24.1",
+
+_deps = [
         "matplotlib>=3.3.4",
         "owlready2>=0.40",
         "torch>=1.7.1",
@@ -19,11 +15,44 @@
         "deap>=1.3.1",
         "httpx>=0.25.2",
         "tqdm>=4.64.0",
-        "transformers>=4.35.0",
+        "transformers>=4.38.1",
         "pytest>=7.2.2",
         "owlapy==0.1.1",
         "dicee==0.1.2",
-        "ontosample>=0.2.2"],
+        "ontosample>=0.2.2",
+        "gradio>=4.11.0"]
+
+deps = {b: a for a, b in (re.findall(r"^(([^!=<>~ ]+)(?:[!=<>~ ].*)?$)", x)[0] for x in _deps)}
+
+
+def deps_list(*pkgs):
+    return [deps[pkg] for pkg in pkgs]
+
+
+extras = dict()
+extras["min"] = deps_list(
+    "matplotlib",
+    "torch",
+    "rdflib",
+    "pandas",
+    "sortedcontainers",
+    "owlready2",
+    "owlapy",
+    "flask",  # Drill, NCES
+    "tqdm", "transformers",  # NCES
+    "dicee",  # Drill
+    "deap",  # Evolearner
+)
+
+extras["full"] = (extras["min"] + deps_list("httpx", "pytest", "gradio", "ontosample"))
+
+setup(
+    name="ontolearn",
+    description="Ontolearn is an open-source software library for structured machine learning in Python. Ontolearn includes modules for processing knowledge bases, inductive logic programming and ontology engineering.",
+    version="0.7.0",
+    packages=find_packages(),
+    install_requires=extras["min"],
+    extras_require=extras,
     author='Caglar Demir',
     author_email='caglardemir8@gmail.com',
     url='https://github.com/dice-group/Ontolearn',

From 7151cb30a02869cc2a598cc4e7725dfdac5e9ff8 Mon Sep 17 00:00:00 2001
From: Alkid <alkid1baci@gmail.com>
Date: Thu, 29 Feb 2024 13:16:01 +0100
Subject: [PATCH 4/8] Increased version

---
 docs/usage/01_introduction.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/usage/01_introduction.md b/docs/usage/01_introduction.md
index 50fbd9e6..8655af19 100644
--- a/docs/usage/01_introduction.md
+++ b/docs/usage/01_introduction.md
@@ -1,6 +1,6 @@
 # Ontolearn
 
-**Version:** ontolearn 0.6.1
+**Version:** ontolearn 0.7.0
 
 **GitHub repository:** [https://github.com/dice-group/Ontolearn](https://github.com/dice-group/Ontolearn)
 

From f58195aa81388885d26249e67f2ac13bdb8aca8c Mon Sep 17 00:00:00 2001
From: Alkid <alkid1baci@gmail.com>
Date: Fri, 1 Mar 2024 13:19:54 +0100
Subject: [PATCH 5/8] EvoLearner resets after each `fit` call #337

---
 ontolearn/concept_learner.py | 31 ++++++++++++++++++++++++-------
 tests/test_evolearner.py     | 14 +++++++++-----
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/ontolearn/concept_learner.py b/ontolearn/concept_learner.py
index aa74fd4b..9a8bfbc4 100644
--- a/ontolearn/concept_learner.py
+++ b/ontolearn/concept_learner.py
@@ -689,7 +689,7 @@ class EvoLearner(BaseConceptLearner[EvoLearnerNode]):
     __slots__ = 'fitness_func', 'init_method', 'algorithm', 'value_splitter', 'tournament_size', \
         'population_size', 'num_generations', 'height_limit', 'use_data_properties', 'pset', 'toolbox', \
         '_learning_problem', '_result_population', 'mut_uniform_gen', '_dp_to_prim_type', '_dp_splits', \
-        '_split_properties', '_cache', 'use_card_restrictions', 'card_limit', 'use_inverse'
+        '_split_properties', '_cache', 'use_card_restrictions', 'card_limit', 'use_inverse', 'total_fits'
 
     name = 'evolearner'
 
@@ -788,11 +788,12 @@ def __init__(self,
         self.population_size = population_size
         self.num_generations = num_generations
         self.height_limit = height_limit
+        self.total_fits = 0
         self.__setup()
 
     def __setup(self):
+        self.clean(partial=True)
         self._cache = dict()
-        self.clean()
         if self.fitness_func is None:
             self.fitness_func = LinearPressureFitness()
 
@@ -971,7 +972,11 @@ def fit(self, *args, **kwargs) -> 'EvoLearner':
         """
         Find hypotheses that explain pos and neg.
         """
-        self.clean()
+        # Don't reset everything if the user is just using this model for 1 learning problem, since he may use the
+        # register_op method, else-wise we need to `clean` before fitting to get a fresh fit.
+        if self.total_fits > 0:
+            self.clean()
+        self.total_fits += 1
         learning_problem = self.construct_learning_problem(PosNegLPStandard, args, kwargs)
         self._learning_problem = learning_problem.encode_kb(self.kb)
 
@@ -1049,9 +1054,7 @@ def _fitness_func(self, individual: Tree):
             self._cache[ind_str] = (e.q, individual.fitness.values[0])
             self._number_of_tested_concepts += 1
 
-    def clean(self):
-        self._result_population = None
-
+    def clean(self, partial: bool = False):
         # Resets classes if they already exist, names must match the ones that were created in the toolbox
         try:
             del creator.Fitness
@@ -1059,8 +1062,22 @@ def clean(self):
             del creator.Quality
         except AttributeError:
             pass
-        self._cache.clear()
         super().clean()
+        if not partial:
+            # Reset everything if fitting more than one lp. Tests have shown that this is necessary to get the
+            # best performance of EvoLearner.
+            self._result_population = None
+            self._cache.clear()
+            self.fitness_func = LinearPressureFitness()
+            self.init_method = EARandomWalkInitialization()
+            self.algorithm = EASimple()
+            self.mut_uniform_gen = EARandomInitialization(min_height=1, max_height=3)
+            self.value_splitter = EntropyValueSplitter()
+            self._dp_to_prim_type = dict()
+            self._dp_splits = dict()
+            self._split_properties = []
+            self.pset = self.__build_primitive_set()
+            self.toolbox = self.__build_toolbox()
 
 
 class NCES(BaseNCES):
diff --git a/tests/test_evolearner.py b/tests/test_evolearner.py
index 70d64062..4bd0b0a9 100644
--- a/tests/test_evolearner.py
+++ b/tests/test_evolearner.py
@@ -18,9 +18,9 @@ def test_regression_family(self):
         kb = KnowledgeBase(path=settings['data_path'][3:])
         model = EvoLearner(knowledge_base=kb, max_runtime=10)
 
-        regression_test_evolearner = {'Aunt': 0.9, 'Brother': 1.0,
-                                      'Cousin': 0.9, 'Granddaughter': 1.0,
-                                      'Uncle': 0.9, 'Grandgrandfather': 0.94}
+        regression_test_evolearner = {'Aunt': 1.0, 'Brother': 1.0,
+                                      'Cousin': 1.0, 'Granddaughter': 1.0,
+                                      'Uncle': 1.0, 'Grandgrandfather': 1.0}
         for str_target_concept, examples in settings['problems'].items():
             pos = set(map(OWLNamedIndividual, map(IRI.create, set(examples['positive_examples']))))
             neg = set(map(OWLNamedIndividual, map(IRI.create, set(examples['negative_examples']))))
@@ -31,8 +31,12 @@ def test_regression_family(self):
             self.assertEqual(returned_model, model)
             hypotheses = list(returned_model.best_hypotheses(n=3))
             self.assertGreaterEqual(hypotheses[0].quality, regression_test_evolearner[str_target_concept])
-            self.assertGreaterEqual(hypotheses[0].quality, hypotheses[1].quality)
-            self.assertGreaterEqual(hypotheses[1].quality, hypotheses[2].quality)
+            # best_hypotheses returns distinct hypotheses and sometimes the model will not find 'n' distinct hypothesis,
+            # hence the checks
+            if len(hypotheses) == 2:
+                self.assertGreaterEqual(hypotheses[0].quality, hypotheses[1].quality)
+            if len(hypotheses) == 3:
+                self.assertGreaterEqual(hypotheses[1].quality, hypotheses[2].quality)
 
     def test_regression_mutagenesis_multiple_fits(self):
         kb = KnowledgeBase(path='KGs/Mutagenesis/mutagenesis.owl')

From ae62f8e369bb6a934f72d30358b53ceac5460c7c Mon Sep 17 00:00:00 2001
From: Jean-KOUAGOU <jeank@aims.ac.za>
Date: Mon, 4 Mar 2024 14:17:00 +0100
Subject: [PATCH 6/8] added CLIP

---
 README.md                                  |  52 ++--
 examples/clip_notebook.ipynb               | 234 ++++++++++++++++++
 examples/concept_learning_cv_evaluation.py |  64 ++++-
 ontolearn/clip_architectures.py            | 117 +++++++++
 ontolearn/clip_trainer.py                  | 138 +++++++++++
 ontolearn/concept_learner.py               | 274 ++++++++++++++++++++-
 ontolearn/data_struct.py                   |  82 +++++-
 ontolearn/nces_trainer.py                  |  10 +-
 8 files changed, 919 insertions(+), 52 deletions(-)
 create mode 100644 examples/clip_notebook.ipynb
 create mode 100644 ontolearn/clip_architectures.py
 create mode 100644 ontolearn/clip_trainer.py

diff --git a/README.md b/README.md
index 8a445c06..1c78cae1 100644
--- a/README.md
+++ b/README.md
@@ -109,44 +109,44 @@ Note that F1 scores denote the quality of the find/constructed concept w.r.t. E^
 
 ### Family Benchmark Results
 
-| LP                 |   Train-F1-OCEL |   Test-F1-OCEL |   RT-OCEL |   Train-F1-CELOE |   Test-F1-CELOE |   RT-CELOE |   Train-F1-Evo |   Test-F1-Evo |   RT-Evo |   Train-F1-DRILL |   Test-F1-DRILL |   RT-DRILL |   Train-F1-TDL |   Test-F1-TDL |   RT-TDL |   Train-F1-NCES |   Test-F1-NCES |   RT-NCES |
-|:-------------------|----------------:|---------------:|----------:|-----------------:|----------------:|-----------:|---------------:|--------------:|---------:|-----------------:|----------------:|-----------:|---------------:|--------------:|---------:|----------------:|---------------:|----------:|
-| Aunt               |           0.848 |          0.637 |     8.923 |            0.918 |           0.855 |      8.923 |          1.000 |         0.986 |    1.849 |            0.868 |           0.820 |     10.195 |          0.960 |         0.960 |    7.214 |           0.715 |          0.712 |     0.363 |
-| Brother            |           1.000 |          1.000 |     0.009 |            1.000 |           1.000 |      0.009 |          1.000 |         1.000 |    0.380 |            1.000 |           1.000 |      0.011 |          1.000 |         1.000 |    7.018 |           0.946 |          0.967 |     0.337 |
-| Cousin             |           0.740 |          0.708 |     7.096 |            0.796 |           0.789 |      7.096 |          1.000 |         0.993 |    2.247 |            0.826 |           0.779 |     10.150 |          0.977 |         0.951 |    8.215 |           0.667 |          0.667 |     0.333 |
-| Daughter           |           1.000 |          1.000 |     0.008 |            1.000 |           1.000 |      0.008 |          1.000 |         1.000 |    0.331 |            1.000 |           1.000 |      0.013 |          1.000 |         1.000 |    7.410 |           0.992 |          0.983 |     0.316 |
-| Father             |           1.000 |          1.000 |     0.003 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    0.422 |            1.000 |           1.000 |      0.005 |          1.000 |         1.000 |    7.369 |           0.937 |          0.935 |     0.290 |
-| Granddaughter      |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.361 |            1.000 |           1.000 |      0.004 |          1.000 |         1.000 |    7.254 |           0.924 |          0.941 |     0.320 |
-| Grandfather        |           1.000 |          1.000 |     0.003 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    0.347 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    7.180 |           0.709 |          0.727 |     0.334 |
-| Grandgranddaughter |           1.000 |          1.000 |     0.005 |            1.000 |           1.000 |      0.005 |          1.000 |         1.000 |    0.285 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    6.651 |           0.860 |          0.873 |     0.300 |
-| Grandgrandfather   |           1.000 |          1.000 |     0.464 |            1.000 |           1.000 |      0.464 |          1.000 |         1.000 |    0.287 |            1.000 |           1.000 |      0.116 |          0.953 |         0.947 |    6.656 |           0.768 |          0.793 |     0.337 |
-| Grandgrandmother   |           1.000 |          1.000 |     3.018 |            1.000 |           1.000 |      3.018 |          1.000 |         1.000 |    0.274 |            1.000 |           1.000 |      0.116 |          0.944 |         0.947 |    6.713 |           0.706 |          0.703 |     0.308 |
-| Grandgrandson      |           1.000 |          1.000 |     1.127 |            1.000 |           1.000 |      1.127 |          1.000 |         1.000 |    0.346 |            1.000 |           1.000 |      0.025 |          0.940 |         0.911 |    6.945 |           0.860 |          0.909 |     0.289 |
-| Grandmother        |           1.000 |          1.000 |     0.003 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    0.365 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    7.033 |           0.761 |          0.764 |     0.299 |
-| Grandson           |           1.000 |          1.000 |     0.003 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    0.394 |            1.000 |           1.000 |      0.004 |          1.000 |         1.000 |    7.071 |           0.908 |          0.924 |     0.342 |
-| Mother             |           1.000 |          1.000 |     0.003 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    0.412 |            1.000 |           1.000 |      0.005 |          1.000 |         1.000 |    7.524 |           0.977 |          0.978 |     0.343 |
-| PersonWithASibling |           1.000 |          1.000 |     0.003 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    0.365 |            0.737 |           0.725 |     10.203 |          1.000 |         1.000 |    7.473 |           0.925 |          0.941 |     0.349 |
-| Sister             |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.341 |            1.000 |           1.000 |      0.009 |          1.000 |         1.000 |    7.038 |           0.879 |          0.894 |     0.330 |
-| Son                |           1.000 |          1.000 |     0.003 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    0.349 |            1.000 |           1.000 |      0.004 |          1.000 |         1.000 |    7.232 |           0.927 |          0.893 |     0.316 |
-| Uncle              |           0.903 |          0.891 |    10.118 |            0.907 |           0.891 |     10.118 |          1.000 |         0.967 |    1.451 |            0.928 |           0.908 |     10.089 |          0.926 |         0.918 |    7.160 |           0.688 |          0.693 |     0.345 |
+| LP                 |   Train-F1-OCEL |   Test-F1-OCEL |   RT-OCEL |   Train-F1-CELOE |   Test-F1-CELOE |   RT-CELOE |   Train-F1-Evo |   Test-F1-Evo |   RT-Evo |   Train-F1-DRILL |   Test-F1-DRILL |   RT-DRILL |   Train-F1-TDL |   Test-F1-TDL |   RT-TDL |   Train-F1-NCES |   Test-F1-NCES |   RT-NCES |   Train-F1-CLIP |   Test-F1-CLIP |   RT-CLIP |
+|:-------------------|----------------:|---------------:|----------:|-----------------:|----------------:|-----------:|---------------:|--------------:|---------:|-----------------:|----------------:|-----------:|---------------:|--------------:|---------:|----------------:|---------------:|----------:|----------------:|---------------:|----------:|
+| Aunt               |           0.848 |          0.637 |     9.206 |            0.918 |           0.855 |      9.206 |          0.996 |         0.969 |    3.390 |            0.886 |           0.799 |     60.243 |          0.971 |         0.949 |    6.366 |           0.721 |          0.635 |     0.552 |           0.899 |          0.891 |     5.763 |
+| Brother            |           1.000 |          1.000 |     0.005 |            1.000 |           1.000 |      0.005 |          1.000 |         1.000 |    0.281 |            1.000 |           1.000 |      0.020 |          1.000 |         1.000 |    6.216 |           0.978 |          0.975 |     0.450 |           1.000 |          1.000 |     0.692 |
+| Cousin             |           0.740 |          0.708 |     7.336 |            0.796 |           0.789 |      7.336 |          1.000 |         1.000 |    1.653 |            0.831 |           0.784 |     60.416 |          0.978 |         0.941 |    7.073 |           0.667 |          0.667 |     0.465 |           0.774 |          0.761 |     6.671 |
+| Daughter           |           1.000 |          1.000 |     0.006 |            1.000 |           1.000 |      0.006 |          1.000 |         1.000 |    0.309 |            1.000 |           1.000 |      0.033 |          1.000 |         1.000 |    6.459 |           0.993 |          0.977 |     0.534 |           1.000 |          1.000 |     0.716 |
+| Father             |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.411 |            1.000 |           1.000 |      0.004 |          1.000 |         1.000 |    6.522 |           0.897 |          0.903 |     0.448 |           1.000 |          1.000 |     0.588 |
+| Granddaughter      |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.320 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    6.233 |           0.911 |          0.916 |     0.497 |           1.000 |          1.000 |     0.646 |
+| Grandfather        |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.314 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    6.185 |           0.743 |          0.717 |     0.518 |           1.000 |          1.000 |     0.721 |
+| Grandgranddaughter |           1.000 |          1.000 |     0.004 |            1.000 |           1.000 |      0.004 |          1.000 |         1.000 |    0.293 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    5.858 |           0.837 |          0.840 |     0.518 |           1.000 |          1.000 |     0.710 |
+| Grandgrandfather   |           1.000 |          1.000 |     0.668 |            1.000 |           1.000 |      0.668 |          1.000 |         1.000 |    0.341 |            1.000 |           1.000 |      0.243 |          0.951 |         0.947 |    5.915 |           0.759 |          0.677 |     0.511 |           1.000 |          1.000 |     1.964 |
+| Grandgrandmother   |           1.000 |          1.000 |     0.381 |            1.000 |           1.000 |      0.381 |          1.000 |         1.000 |    0.258 |            1.000 |           1.000 |      0.243 |          0.944 |         0.947 |    5.918 |           0.721 |          0.687 |     0.498 |           0.997 |          1.000 |     2.620 |
+| Grandgrandson      |           1.000 |          1.000 |     0.341 |            1.000 |           1.000 |      0.341 |          1.000 |         1.000 |    0.276 |            1.000 |           1.000 |      0.122 |          0.938 |         0.911 |    6.093 |           0.779 |          0.809 |     0.460 |           1.000 |          1.000 |     2.555 |
+| Grandmother        |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.385 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    6.135 |           0.762 |          0.725 |     0.480 |           1.000 |          1.000 |     0.628 |
+| Grandson           |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.299 |            1.000 |           1.000 |      0.003 |          1.000 |         1.000 |    6.301 |           0.896 |          0.903 |     0.552 |           1.000 |          1.000 |     0.765 |
+| Mother             |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.327 |            1.000 |           1.000 |      0.004 |          1.000 |         1.000 |    6.570 |           0.967 |          0.972 |     0.555 |           1.000 |          1.000 |     0.779 |
+| PersonWithASibling |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.377 |            0.737 |           0.725 |     60.194 |          1.000 |         1.000 |    6.548 |           0.927 |          0.928 |     0.648 |           1.000 |          1.000 |     0.999 |
+| Sister             |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.356 |            1.000 |           1.000 |      0.017 |          1.000 |         1.000 |    6.315 |           0.866 |          0.876 |     0.512 |           1.000 |          1.000 |     0.616 |
+| Son                |           1.000 |          1.000 |     0.002 |            1.000 |           1.000 |      0.002 |          1.000 |         1.000 |    0.317 |            1.000 |           1.000 |      0.004 |          1.000 |         1.000 |    6.579 |           0.892 |          0.855 |     0.537 |           1.000 |          1.000 |     0.700 |
+| Uncle              |           0.903 |          0.891 |    12.441 |            0.907 |           0.891 |     12.441 |          1.000 |         0.971 |    1.675 |            0.951 |           0.894 |     60.337 |          0.894 |         0.896 |    6.310 |           0.667 |          0.665 |     0.619 |           0.928 |          0.942 |     5.577 |
 
 
 ### Mutagenesis Benchmark Results
 ```shell
 python examples/concept_learning_evaluation.py --lps LPs/Mutagenesis/lps.json --kb KGs/Mutagenesis/mutagenesis.owl --max_runtime 60 --report mutagenesis_results.csv && python -c 'import pandas as pd; print(pd.read_csv("mutagenesis_results.csv", index_col=0).to_markdown(floatfmt=".3f"))'
 ```
-| LP       |   F1-OCEL |   RT-OCEL |   F1-CELOE |   RT-CELOE |   F1-Evo |   RT-Evo |   F1-DRILL |   RT-DRILL |   F1-TDL |   RT-TDL |
-|:---------|----------:|----------:|-----------:|-----------:|---------:|---------:|-----------:|-----------:|---------:|---------:|
-| NotKnown |     0.916 |    60.226 |      0.916 |     41.243 |    0.976 |   40.411 |      0.704 |     60.044 |    1.000 |   49.022 |
+| LP       |   Train-F1-OCEL |   Test-F1-OCEL |   RT-OCEL |   Train-F1-CELOE |   Test-F1-CELOE |   RT-CELOE |   Train-F1-Evo |   Test-F1-Evo |   RT-Evo |   Train-F1-DRILL |   Test-F1-DRILL |   RT-DRILL |   Train-F1-TDL |   Test-F1-TDL |   RT-TDL |   Train-F1-NCES |   Test-F1-NCES |   RT-NCES |   Train-F1-CLIP |   Test-F1-CLIP |   RT-CLIP |
+|:---------|----------------:|---------------:|----------:|-----------------:|----------------:|-----------:|---------------:|--------------:|---------:|-----------------:|----------------:|-----------:|---------------:|--------------:|---------:|----------------:|---------------:|----------:|----------------:|---------------:|----------:|
+| NotKnown |           0.916 |          0.918 |    58.328 |            0.916 |           0.918 |     58.328 |          0.724 |         0.729 |   49.281 |            0.704 |           0.704 |     60.052 |          0.879 |         0.771 |    7.763 |           0.564 |          0.560 |     0.493 |           0.814 |          0.807 |     5.622 |
 
 ### Carcinogenesis Benchmark Results
 ```shell
 python examples/concept_learning_evaluation.py --lps LPs/Carcinogenesis/lps.json --kb KGs/Carcinogenesis/carcinogenesis.owl --max_runtime 60 --report carcinogenesis_results.csv  && python -c 'import pandas as pd; print(pd.read_csv("carcinogenesis_results.csv", index_col=0).to_markdown(floatfmt=".3f"))'
 ```
 
-| LP       |   F1-OCEL |   RT-OCEL |   F1-CELOE |   RT-CELOE |   F1-Evo |   RT-Evo |   F1-DRILL |   RT-DRILL |   F1-TDL |   RT-TDL |
-|:---------|----------:|----------:|-----------:|-----------:|---------:|---------:|-----------:|-----------:|---------:|---------:|
-| NOTKNOWN |     0.739 |    64.975 |      0.739 |     60.004 |    0.814 |   60.758 |      0.705 |     60.066 |    1.000 |   56.701 |
+| LP       |   Train-F1-OCEL |   Test-F1-OCEL |   RT-OCEL |   Train-F1-CELOE |   Test-F1-CELOE |   RT-CELOE |   Train-F1-Evo |   Test-F1-Evo |   RT-Evo |   Train-F1-DRILL |   Test-F1-DRILL |   RT-DRILL |   Train-F1-TDL |   Test-F1-TDL |   RT-TDL |   Train-F1-NCES |   Test-F1-NCES |   RT-NCES |   Train-F1-CLIP |   Test-F1-CLIP |   RT-CLIP |
+|:---------|----------------:|---------------:|----------:|-----------------:|----------------:|-----------:|---------------:|--------------:|---------:|-----------------:|----------------:|-----------:|---------------:|--------------:|---------:|----------------:|---------------:|----------:|----------------:|---------------:|----------:|
+| NOTKNOWN |           0.738 |          0.711 |    42.936 |            0.740 |           0.701 |     42.936 |          0.744 |         0.733 |   63.465 |            0.705 |           0.704 |     60.069 |          0.879 |         0.682 |    7.260 |           0.415 |          0.396 |     1.911 |           0.720 |          0.700 |    85.037 |
 
 
 
diff --git a/examples/clip_notebook.ipynb b/examples/clip_notebook.ipynb
new file mode 100644
index 00000000..bc98619b
--- /dev/null
+++ b/examples/clip_notebook.ipynb
@@ -0,0 +1,234 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "blond-letter",
+   "metadata": {},
+   "source": [
+    "# CLIP Notebook\n",
+    "This is a jupyter notebook file to execute [CLIP](ontolearn.concept_learner.CLIP) and generate predictive results. We recommend you to see the [concept learners](../docs/usage/06_concept_learners.md) guide before continuing with the execution.\n",
+    "Also if you have not done it already, from the main directory \"Ontolearn\", run the commands for Datasets mentioned [here](https://ontolearn-docs-dice-group.netlify.app/usage/02_installation#download-external-files) to download the datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "japanese-ivory",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Warning: SQLite3 version 3.40.0 and 3.41.2 have huge performance regressions; please install version 3.41.1 or 3.42!\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "from ontolearn.knowledge_base import KnowledgeBase\n",
+    "from ontolearn.concept_learner import CLIP\n",
+    "from ontolearn.refinement_operators import ExpressRefinement\n",
+    "from ontolearn.learning_problem import PosNegLPStandard\n",
+    "from owlapy.model import OWLNamedIndividual, IRI\n",
+    "from ontolearn.utils import setup_logging\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "pending-coast",
+   "metadata": {},
+   "source": [
+    "Open `uncle_lp.json` where we have stored the learning problem for the concept of 'Uncle' and the path to the 'family' ontology."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "beginning-syntax",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "with open('uncle_lp.json') as json_file:\n",
+    "    settings = json.load(json_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "humanitarian-heating",
+   "metadata": {},
+   "source": [
+    "Create an instance of the class `KnowledeBase` by using the path that is stored in `settings`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "caroline-indiana",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "kb = KnowledgeBase(path=settings['data_path'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "lucky-activation",
+   "metadata": {},
+   "source": [
+    "Retreive the IRIs of the positive and negative examples of Uncle from `settings` and create an instance of `PosNegLPStandard`. (more info about this [here](../docs/usage/06_concept_learners.md#configure-the-learning-problem))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "processed-patrick",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "examples = settings['Uncle']\n",
+    "p = set(examples['positive_examples'])\n",
+    "n = set(examples['negative_examples'])\n",
+    "typed_pos = set(map(OWLNamedIndividual, map(IRI.create, p)))\n",
+    "typed_neg = set(map(OWLNamedIndividual, map(IRI.create, n)))\n",
+    "lp = PosNegLPStandard(pos=typed_pos, neg=typed_neg)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "mechanical-latin",
+   "metadata": {},
+   "source": [
+    "Create a model of [CLIP](ontolearn.concept_learner.CLIP) and fit the learning problem to the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "171d1aa4-6c12-42c0-b7e9-8cf2dce85ff9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "op = ExpressRefinement(knowledge_base=kb, use_inverse=False,\n",
+    "                          use_numeric_datatypes=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "binding-moderator",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      " Loaded length predictor!\n",
+      "\n",
+      " Loaded length predictor!\n",
+      "\n",
+      " Loaded length predictor!\n",
+      "\n",
+      " Loaded length predictor!\n",
+      "\n",
+      "***** Predicted length: 5 *****\n",
+      "\n",
+      "***** Predicted length: 5 *****\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<ontolearn.concept_learner.CLIP at 0x7f762ae039a0>"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = CLIP(knowledge_base=kb, path_of_embeddings=\"../CLIPData/family/embeddings/ConEx_entity_embeddings.csv\",\n",
+    "             refinement_operator=op, load_pretrained=True, max_runtime=200)\n",
+    "model.fit(lp)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d981f2b9-3489-494e-825d-6a72ee480d4f",
+   "metadata": {},
+   "source": [
+    "## Retrieve top 3 hypotheses and print them."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "c6a90b21-3594-441d-bed0-eb822db5f993",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'ontolearn.search.OENode'> at 0x0304774\tMale ⊓ (∀ hasParent.Grandparent)\tQuality:0.90476\tHeuristic:0.40407\tDepth:2\tH_exp:6\t|RC|:7\t|Indv.|:None\n",
+      "<class 'ontolearn.search.OENode'> at 0x0ca154a\tMale ⊓ (∀ hasChild.Grandchild)\tQuality:0.90476\tHeuristic:0.36919\tDepth:1\tH_exp:7\t|RC|:7\t|Indv.|:None\n",
+      "<class 'ontolearn.search.OENode'> at 0x2adbb89\tMale ⊓ (∀ hasChild.(¬Grandfather))\tQuality:0.88889\tHeuristic:0.39044\tDepth:3\tH_exp:6\t|RC|:0\t|Indv.|:None\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[None, None, None]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hypotheses = list(model.best_hypotheses(n=3))\n",
+    "[print(_) for _ in hypotheses]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "onto",
+   "language": "python",
+   "name": "onto"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/concept_learning_cv_evaluation.py b/examples/concept_learning_cv_evaluation.py
index acab30f0..c9fb4c61 100644
--- a/examples/concept_learning_cv_evaluation.py
+++ b/examples/concept_learning_cv_evaluation.py
@@ -12,7 +12,8 @@
 import time
 import pandas as pd
 from ontolearn.knowledge_base import KnowledgeBase
-from ontolearn.concept_learner import CELOE, OCEL, EvoLearner, NCES
+from ontolearn.concept_learner import CELOE, OCEL, EvoLearner, NCES, CLIP
+from ontolearn.refinement_operators import ExpressRefinement
 from ontolearn.learners import Drill, TDL
 from ontolearn.learning_problem import PosNegLPStandard
 from ontolearn.metrics import F1
@@ -32,13 +33,13 @@ def dl_concept_learning(args):
         settings = json.load(json_file)
 
     kb = KnowledgeBase(path=args.kb)
-    ocel = OCEL(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(),
+    ocel = OCEL(knowledge_base=kb, quality_func=F1(),
                 max_runtime=args.max_runtime)
-    celoe = CELOE(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(),
+    celoe = CELOE(knowledge_base=kb, quality_func=F1(),
                   max_runtime=args.max_runtime)
-    drill = Drill(knowledge_base=KnowledgeBase(path=args.kb), path_pretrained_kge=args.path_pretrained_kge,
+    drill = Drill(knowledge_base=kb, path_pretrained_kge=args.path_pretrained_kge,
                   quality_func=F1(), max_runtime=args.max_runtime)
-    tdl = TDL(knowledge_base=KnowledgeBase(path=args.kb),
+    tdl = TDL(knowledge_base=kb,
               dataframe_triples=pd.DataFrame(
                   data=sorted([(str(s), str(p), str(o)) for s, p, o in Graph().parse(args.kb)], key=lambda x: len(x)),
                   columns=['subject', 'relation', 'object'], dtype=str),
@@ -46,13 +47,27 @@ def dl_concept_learning(args):
               max_runtime=args.max_runtime)
     nces = NCES(knowledge_base_path=args.kb, quality_func=F1(), path_of_embeddings=args.path_of_nces_embeddings,
                 pretrained_model_name=["LSTM", "GRU", "SetTransformer"], num_predictions=5)
+    
+    express_rho = ExpressRefinement(kb, use_inverse=False, use_numeric_datatypes=False)
+    clip = CLIP(knowledge_base=kb, refinement_operator=express_rho, quality_func=F1(), 
+                max_num_of_concepts_tested=int(1e9), max_runtime=args.max_runtime, 
+                path_of_embeddings=args.path_of_clip_embeddings,
+                pretrained_predictor_name=["LSTM", "GRU", "SetTransformer", "CNN"], load_pretrained=True)
 
     # dictionary to store the data
     data = dict()
-    for str_target_concept, examples in settings['problems'].items():
+    if "problems" in settings:
+        problems = settings['problems'].items()
+        positives_key = "positive_examples"
+        negatives_key = "negative_examples"
+    else:
+        problems = settings.items()
+        positives_key = "positive examples"
+        negatives_key = "negative examples"
+    for str_target_concept, examples in problems:
         print('Target concept: ', str_target_concept)
-        p = examples['positive_examples']
-        n = examples['negative_examples']
+        p = examples[positives_key]
+        n = examples[negatives_key]
 
         kf = StratifiedKFold(n_splits=args.folds, shuffle=True, random_state=args.random_seed)
         X = np.array(p + n)
@@ -67,16 +82,16 @@ def dl_concept_learning(args):
             train_neg = {neg_individual for neg_individual in X[train_index][y[train_index] == 0]}
 
             # Sanity checking for individuals used for training.
-            assert train_pos.issubset(examples['positive_examples'])
-            assert train_neg.issubset(examples['negative_examples'])
+            assert train_pos.issubset(examples[positives_key])
+            assert train_neg.issubset(examples[negatives_key])
 
             # () Extract positive and negative examples from test fold
             test_pos = {pos_individual for pos_individual in X[test_index][y[test_index] == 1]}
             test_neg = {neg_individual for neg_individual in X[test_index][y[test_index] == 0]}
 
             # Sanity checking for individuals used for testing.
-            assert test_pos.issubset(examples['positive_examples'])
-            assert test_neg.issubset(examples['negative_examples'])
+            assert test_pos.issubset(examples[positives_key])
+            assert test_neg.issubset(examples[negatives_key])
             train_lp = PosNegLPStandard(pos=set(map(OWLNamedIndividual, map(IRI.create, train_pos))),
                                         neg=set(map(OWLNamedIndividual, map(IRI.create, train_neg))))
 
@@ -217,6 +232,28 @@ def dl_concept_learning(args):
             print(f"NCES Train Quality: {train_f1_nces:.3f}", end="\t")
             print(f"NCES Test Quality: {test_f1_nces:.3f}", end="\t")
             print(f"NCES Runtime: {rt_nces:.3f}")
+            
+            
+            print("CLIP starts..", end="\t")
+            start_time = time.time()
+            pred_clip = clip.fit(train_lp).best_hypotheses(n=1)
+            rt_clip = time.time() - start_time
+            print("CLIP ends..", end="\t")
+            # () Quality on the training data
+            train_f1_clip = compute_f1_score(individuals={i for i in kb.individuals(pred_clip.concept)},
+                                              pos=train_lp.pos,
+                                              neg=train_lp.neg)
+            # () Quality on test data
+            test_f1_clip = compute_f1_score(individuals={i for i in kb.individuals(pred_clip.concept)},
+                                             pos=test_lp.pos,
+                                             neg=test_lp.neg)
+            
+            data.setdefault("Train-F1-CLIP", []).append(train_f1_clip)
+            data.setdefault("Test-F1-CLIP", []).append(test_f1_clip)
+            data.setdefault("RT-CLIP", []).append(rt_clip)
+            print(f"CLIP Train Quality: {train_f1_clip:.3f}", end="\t")
+            print(f"CLIP Test Quality: {test_f1_clip:.3f}", end="\t")
+            print(f"CLIP Runtime: {rt_clip:.3f}")
 
     df = pd.DataFrame.from_dict(data)
     df.to_csv(args.report, index=False)
@@ -227,12 +264,13 @@ def dl_concept_learning(args):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Description Logic Concept Learning')
     parser.add_argument("--max_runtime", type=int, default=10, help="Max runtime")
-    parser.add_argument("--lps", type=str, required=True, help="Path fto the learning problems")
+    parser.add_argument("--lps", type=str, required=True, help="Path to the learning problems")
     parser.add_argument("--folds", type=int, default=10, help="Number of folds of cross validation.")
     parser.add_argument("--kb", type=str, required=True,
                         help="Knowledge base")
     parser.add_argument("--path_pretrained_kge", type=str, default=None)
     parser.add_argument("--path_of_nces_embeddings", type=str, default=None)
+    parser.add_argument("--path_of_clip_embeddings", type=str, default=None)
     parser.add_argument("--report", type=str, default="report.csv")
     parser.add_argument("--random_seed", type=int, default=1)
     dl_concept_learning(parser.parse_args())
\ No newline at end of file
diff --git a/ontolearn/clip_architectures.py b/ontolearn/clip_architectures.py
new file mode 100644
index 00000000..ac210f6a
--- /dev/null
+++ b/ontolearn/clip_architectures.py
@@ -0,0 +1,117 @@
+import torch, torch.nn as nn
+import random
+from typing import List
+from ontolearn.nces_modules import *    
+
+class LengthLearner_LSTM(nn.Module):
+    """LSTM architecture"""
+    def __init__(self, input_size, output_size, proj_dim=256, rnn_n_layers=2, drop_prob=0.2):
+        super().__init__()
+        self.name = 'LSTM'
+        self.loss = nn.CrossEntropyLoss()
+        self.lstm = nn.LSTM(input_size, proj_dim, rnn_n_layers, 
+                            dropout=drop_prob, batch_first=True)
+        self.dropout = nn.Dropout(drop_prob)
+        self.fc1 = nn.Linear(2*proj_dim, proj_dim)
+        self.fc2 = nn.Linear(proj_dim, proj_dim)
+        self.fc3 = nn.Linear(proj_dim, output_size)  
+    
+    def forward(self, x1, x2):
+        ''' Forward pass through the network.'''
+        x1, _ = self.lstm(x1)
+        x1 = x1.sum(1).contiguous().view(x1.shape[0], -1)
+        x2, _ = self.lstm(x2)
+        x2 = x2.sum(1).contiguous().view(x2.shape[0], -1)
+        x = torch.cat([x1, x2], dim=-1)
+        x = self.fc1(x)
+        x = torch.selu(x)
+        x = self.dropout(x)
+        x = self.fc2(x)
+        x = x + torch.tanh(x)
+        x = self.fc3(x)
+        return x
+
+class LengthLearner_GRU(nn.Module):
+    """GRU architecture"""
+    def __init__(self, input_size, output_size, proj_dim=256, rnn_n_layers=2, drop_prob=0.2):
+        super().__init__()
+        self.name = 'GRU'
+        self.loss = nn.CrossEntropyLoss()
+        self.gru = nn.GRU(input_size, proj_dim, rnn_n_layers, 
+                            dropout=drop_prob, batch_first=True)
+        self.dropout = nn.Dropout(drop_prob)
+        self.fc1 = nn.Linear(2*proj_dim, proj_dim)
+        self.fc2 = nn.Linear(proj_dim, proj_dim)
+        self.fc3 = nn.Linear(proj_dim, output_size)
+
+    def forward(self, x1, x2):
+        ''' Forward pass through the network.'''
+        x1, _ = self.gru(x1)
+        x1 = x1.sum(1).contiguous().view(x1.shape[0], -1)
+        x2, _ = self.gru(x2)
+        x2 = x2.sum(1).contiguous().view(x2.shape[0], -1)
+        x = torch.cat([x1, x2], dim=-1)
+        x = self.fc1(x)
+        x = torch.selu(x)
+        x = self.dropout(x)
+        x = self.fc2(x)
+        x = x + torch.tanh(x)
+        x = self.fc3(x)
+        return x
+        
+
+class LengthLearner_CNN(nn.Module):
+    """CNN architecture"""
+    def __init__(self, input_size, output_size, num_examples, proj_dim=256, kernel_size: list=[[5,7], [5,7]], stride: list=[[3,3], [3,3]], drop_prob=0.2):
+        super().__init__()
+        assert isinstance(kernel_size, list) and isinstance(kernel_size[0], list), "kernel size and stride must be lists of lists, e.g., [[5,7], [5,7]]"
+        self.name = 'CNN'
+        self.loss = nn.CrossEntropyLoss()
+        self.conv1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=(kernel_size[0][0], kernel_size[0][1]), stride=(stride[0][0], stride[0][1]), padding=(0,0))
+        self.conv2 = nn.Conv2d(in_channels=4, out_channels=8, kernel_size=(kernel_size[1][0], kernel_size[1][1]), stride=(stride[1][0], stride[1][1]), padding=(0,0))
+        self.dropout1d = nn.Dropout(drop_prob)
+        self.dropout2d = nn.Dropout2d(drop_prob)
+        conv_out_dim = 3536
+        self.fc1 = nn.Linear(conv_out_dim, proj_dim)
+        self.fc2 = nn.Linear(proj_dim, proj_dim)
+        self.fc3 = nn.Linear(proj_dim, output_size)
+        
+    def forward(self, x1, x2):
+        ''' Forward pass through the network.'''
+        x1 = x1.unsqueeze(1)
+        x2 = x2.unsqueeze(1)
+        x = torch.cat([x1, x2], dim=-2)
+        x = self.conv1(x)
+        x = torch.selu(x)
+        x = self.dropout2d(x)
+        x = self.conv2(x)
+        x = x.view(x.shape[0], -1)
+        x = self.fc1(x)
+        x = torch.selu(x)
+        x = self.dropout1d(x)
+        x = self.fc2(x)
+        x = x + torch.tanh(x)
+        x = self.fc3(x)
+        return x
+    
+        
+class LengthLearner_SetTransformer(nn.Module):
+    """SetTransformer architecture."""
+    def __init__(self, input_size, output_size, proj_dim=256, num_heads=4, num_seeds=1, num_inds=32):
+        super().__init__()
+        self.name = 'SetTransformer'
+        self.loss = nn.CrossEntropyLoss()
+        self.enc = nn.Sequential(
+                ISAB(input_size, proj_dim, num_heads, num_inds),
+                ISAB(proj_dim, proj_dim, num_heads, num_inds))
+        self.dec = nn.Sequential(
+                PMA(proj_dim, num_heads, num_seeds),
+                nn.Linear(proj_dim, output_size))
+
+    def forward(self, x1, x2):
+        ''' Forward pass through the network.'''
+        x1 = self.enc(x1)
+        x2 = self.enc(x2)
+        x = torch.cat([x1, x2], dim=-2)
+        x = self.dec(x).squeeze()
+        return x
\ No newline at end of file
diff --git a/ontolearn/clip_trainer.py b/ontolearn/clip_trainer.py
new file mode 100644
index 00000000..96421524
--- /dev/null
+++ b/ontolearn/clip_trainer.py
@@ -0,0 +1,138 @@
+import numpy as np
+import copy
+import torch
+from tqdm import trange
+from collections import defaultdict
+import os
+import json
+from torch.optim.lr_scheduler import ExponentialLR
+from torch.nn import functional as F
+from torch.nn.utils import clip_grad_value_
+from torch.nn.utils.rnn import pad_sequence
+from sklearn.metrics import f1_score, accuracy_score
+import time
+
+
+
+class CLIPTrainer:
+    """CLIP trainer."""
+    def __init__(self, clip, epochs=300, learning_rate=1e-4, decay_rate=0, clip_value=5.0,
+                 storage_path="./"):
+        self.clip = clip
+        self.epochs = epochs
+        self.learning_rate = learning_rate
+        self.decay_rate = decay_rate
+        self.clip_value = clip_value
+        self.storage_path = storage_path
+
+    def compute_eval_metric(self, target, prediction):
+        f1 = 100*f1_score(target, prediction, average="micro")
+        acc = 100*accuracy_score(target, prediction)
+        return f1, acc
+
+    def get_optimizer(self, length_predictor, optimizer='Adam'):
+        if optimizer == 'Adam':
+            return torch.optim.Adam(length_predictor.parameters(), lr=self.learning_rate)
+        elif optimizer == 'SGD':
+            return torch.optim.SGD(length_predictor.parameters(), lr=self.learning_rate)
+        elif optimizer == 'RMSprop':
+            return torch.optim.RMSprop(length_predictor.parameters(), lr=self.learning_rate)
+        else:
+            raise ValueError
+            print('Unsupported optimizer')
+
+    def show_num_learnable_params(self):
+        print("*"*20+"Trainable model size"+"*"*20)
+        size = sum([p.numel() for p in self.clip.length_predictor.parameters()])
+        size_ = 0
+        print("Length Predictor: ", size)
+        print("*"*20+"Trainable model size"+"*"*20)
+        print()
+        return size
+    
+    def train(self, train_dataloader, save_model=True, optimizer='Adam', record_runtime=True):
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        if isinstance(self.clip.length_predictor, list):
+            self.clip.length_predictor = copy.deepcopy(self.clip.length_predictor[0])
+        model_size = self.show_num_learnable_params()
+        if device.type == "cpu":
+            print("Training on CPU, it may take long...")
+        else:
+            print("GPU available !")
+        print()
+        print("#"*50)
+        print()
+        print("{} starts training... \n".format(self.clip.length_predictor.name))
+        print("#"*50, "\n")
+        length_predictor = copy.deepcopy(self.clip.length_predictor).train()
+        desc = length_predictor.name
+        if device.type == "cuda":
+            length_predictor.cuda()
+        opt = self.get_optimizer(length_predictor=length_predictor, optimizer=optimizer)
+        if self.decay_rate:
+            self.scheduler = ExponentialLR(opt, self.decay_rate)
+        Train_loss = []
+        F1, Acc = [], []
+        best_score = 0.
+        if record_runtime:
+            t0 = time.time()
+        Epochs = trange(self.epochs, desc=f'Loss: {np.nan}, F1: {np.nan}, Acc: {np.nan}', leave=True)
+        for e in Epochs:
+            f1s, accs = [], []
+            train_losses = []
+            for x1, x2, labels in train_dataloader:
+                if device.type == "cuda":
+                    x1, x2, labels = x1.cuda(), x2.cuda(), labels.cuda()
+                scores = length_predictor(x1, x2)
+                loss = length_predictor.loss(scores, labels)
+                predictions = scores.argmax(1).detach().cpu().numpy()
+                f1, acc = self.compute_eval_metric(labels.cpu().numpy(), predictions)
+                f1s.append(f1)
+                accs.append(acc)
+                train_losses.append(loss.item())
+                opt.zero_grad()
+                loss.backward()
+                clip_grad_value_(length_predictor.parameters(), clip_value=self.clip_value)
+                opt.step()
+                if self.decay_rate:
+                    self.scheduler.step()
+            F1.append(np.mean(f1s))
+            Acc.append(np.mean(accs))
+            Train_loss.append(np.mean(train_losses))
+            Epochs.set_description('Loss: {:.4f}, F1: {:.2f}%, Acc: {:.2f}%'.format(Train_loss[-1],
+                                                                                        F1[-1],
+                                                                                        Acc[-1]))
+            Epochs.refresh()
+            weights = copy.deepcopy(length_predictor.state_dict())
+            if Acc and Acc[-1] > best_score:
+                best_score = Acc[-1]
+                best_weights = weights
+        length_predictor.load_state_dict(best_weights)
+        if record_runtime:
+            duration = time.time()-t0
+            runtime_info = {"Architecture": length_predictor.name,
+                            "Number of Epochs": self.epochs, "Runtime (s)": duration}
+            if not os.path.exists(self.storage_path+"/runtime/"):
+                os.mkdir(self.storage_path+"/runtime/")
+            with open(self.storage_path+"/runtime/runtime"+"_"+desc+".json", "w") as file:
+                json.dump(runtime_info, file, indent=3)
+        results_dict = dict()
+        print("Top performance: loss: {:.4f}, f1: {:.2f}% ... "
+              "acc: {:.2f}%".format(min(Train_loss), max(F1), max(Acc)), "weights saved based on Acc best score!")
+        print()
+        results_dict.update({"Train Max F1": max(F1), "Train Acc": max(Acc),
+                             "Train Min Loss": min(Train_loss)})
+        if not os.path.exists(self.storage_path+"/results/"):
+            os.mkdir(self.storage_path+"/results/")
+        with open(self.storage_path+"/results/"+"results"+"_"+desc+".json", "w") as file:
+            json.dump(results_dict, file, indent=3)
+        if save_model:
+            if not os.path.exists(self.storage_path+"/trained_models/"):
+                os.mkdir(self.storage_path+"/trained_models/")
+            torch.save(length_predictor.state_dict(), self.storage_path+"/trained_models/"+"trained_"+desc+".pt")
+            print("{} saved".format(length_predictor.name))
+        if not os.path.exists(self.storage_path+"/metrics/"):
+            os.mkdir(self.storage_path+"/metrics/")
+        with open(self.storage_path+"/metrics/"+"metrics_"+desc+".json", "w") as plot_file:
+            json.dump({"f1": F1, "acc": Acc, "loss": Train_loss}, plot_file,
+                      indent=3)
diff --git a/ontolearn/concept_learner.py b/ontolearn/concept_learner.py
index aa74fd4b..a5cf7580 100644
--- a/ontolearn/concept_learner.py
+++ b/ontolearn/concept_learner.py
@@ -15,6 +15,7 @@
 from torch import nn
 from torch.utils.data import DataLoader
 from torch.functional import F
+from torch.nn.utils.rnn import pad_sequence
 from torch.nn.init import xavier_normal_
 from deap import gp, tools, base, creator
 
@@ -24,7 +25,7 @@
 from ontolearn.base_concept_learner import BaseConceptLearner, RefinementBasedConceptLearner
 from ontolearn.base.owl.utils import EvaluatedDescriptionSet, ConceptOperandSorter, OperandSetTransform
 from ontolearn.data_struct import PrepareBatchOfTraining, PrepareBatchOfPrediction, NCESDataLoader, \
-    NCESDataLoaderInference
+    NCESDataLoaderInference, CLIPDataLoader, CLIPDataLoaderInference
 from ontolearn.ea_algorithms import AbstractEvolutionaryAlgorithm, EASimple
 from ontolearn.ea_initialization import AbstractEAInitialization, EARandomInitialization, EARandomWalkInitialization
 from ontolearn.ea_utils import PrimitiveFactory, OperatorVocabulary, ToolboxVocabulary, Tree, escape, ind_to_string, \
@@ -33,7 +34,7 @@
 from ontolearn.heuristics import OCELHeuristic
 from ontolearn.learning_problem import PosNegLPStandard, EncodedPosNegLPStandard
 from ontolearn.metrics import Accuracy, F1
-from ontolearn.refinement_operators import LengthBasedRefinement
+from ontolearn.refinement_operators import LengthBasedRefinement, ExpressRefinement
 from ontolearn.search import EvoLearnerNode, NCESNode, HeuristicOrderedNode, LBLNode, OENode, TreeNode, LengthOrderedNode, \
     QualityOrderedNode, RL_State, DRILLSearchTreePriorityQueue, EvaluatedConcept
 from ontolearn.utils import oplogging, create_experiment_folder
@@ -41,7 +42,9 @@
 from ontolearn.value_splitter import AbstractValueSplitter, BinningValueSplitter, EntropyValueSplitter
 from ontolearn.base_nces import BaseNCES
 from ontolearn.nces_architectures import LSTM, GRU, SetTransformer
+from ontolearn.clip_architectures import LengthLearner_LSTM, LengthLearner_GRU, LengthLearner_CNN, LengthLearner_SetTransformer
 from ontolearn.nces_trainer import NCESTrainer, before_pad
+from ontolearn.clip_trainer import CLIPTrainer
 from ontolearn.nces_utils import SimpleSolution
 from owlapy.model import OWLClassExpression, OWLDataProperty, OWLLiteral, OWLNamedIndividual, OWLReasoner, OWLClass
 from owlapy.render import DLSyntaxObjectRenderer
@@ -1061,6 +1064,271 @@ def clean(self):
             pass
         self._cache.clear()
         super().clean()
+        
+        
+class CLIP(CELOE):
+    """Concept Learner with Integrated Length Prediction.
+    This algorithm extends the CELOE algorithm by using concept length predictors and a different refinement operator, i.e., ExpressRefinement
+
+    Attributes:
+        best_descriptions (EvaluatedDescriptionSet[OENode, QualityOrderedNode]): Best hypotheses ordered.
+        best_only (bool): If False pick only nodes with quality < 1.0, else pick without quality restrictions.
+        calculate_min_max (bool): Calculate minimum and maximum horizontal expansion? Statistical purpose only.
+        heuristic_func (AbstractHeuristic): Function to guide the search heuristic.
+        heuristic_queue (SortedSet[OENode]): A sorted set that compares the nodes based on Heuristic.
+        iter_bound (int): Limit to stop the algorithm after n refinement steps are done.
+        kb (KnowledgeBase): The knowledge base that the concept learner is using.
+        max_child_length (int): Limit the length of concepts generated by the refinement operator.
+        max_he (int): Maximal value of horizontal expansion.
+        max_num_of_concepts_tested (int) Limit to stop the algorithm after n concepts tested.
+        max_runtime (int): Limit to stop the algorithm after n seconds.
+        min_he (int): Minimal value of horizontal expansion.
+        name (str): Name of the model = 'celoe_python'.
+        _number_of_tested_concepts (int): Yes, you got it. This stores the number of tested concepts.
+        operator (BaseRefinement): Operator used to generate refinements.
+        quality_func (AbstractScorer) The quality function to be used.
+        reasoner (OWLReasoner): The reasoner that this model is using.
+        search_tree (Dict[OWLClassExpression, TreeNode[OENode]]): Dict to store the TreeNode for a class expression.
+        start_class (OWLClassExpression): The starting class expression for the refinement operation.
+        start_time (float): The time when :meth:`fit` starts the execution. Used to calculate the total time :meth:`fit`
+                            takes to execute.
+        terminate_on_goal (bool): Whether to stop the algorithm if a perfect solution is found.
+
+    """
+    __slots__ = 'best_descriptions', 'max_he', 'min_he', 'best_only', 'calculate_min_max', 'heuristic_queue', \
+        'search_tree', '_learning_problem', '_max_runtime', '_seen_norm_concepts', 'predictor_name', 'pretrained_predictor_name', \
+    'load_pretrained', 'output_size', 'num_examples', 'path_of_embeddings', 'instance_embeddings', 'input_size', 'device', 'length_predictor', \
+    'num_workers', 'knowledge_base_path'
+
+    name = 'clip'
+    def __init__(self,
+                 knowledge_base: KnowledgeBase,
+                 knowledge_base_path = '',
+                 reasoner: Optional[OWLReasoner] = None,
+                 refinement_operator: Optional[BaseRefinement[OENode]] = ExpressRefinement,
+                 quality_func: Optional[AbstractScorer] = None,
+                 heuristic_func: Optional[AbstractHeuristic] = None,
+                 terminate_on_goal: Optional[bool] = None,
+                 iter_bound: Optional[int] = None,
+                 max_num_of_concepts_tested: Optional[int] = None,
+                 max_runtime: Optional[int] = None,
+                 max_results: int = 10,
+                 best_only: bool = False,
+                 calculate_min_max: bool = True,
+                 path_of_embeddings="",
+                 predictor_name = None,
+                 pretrained_predictor_name = ["SetTransformer", "LSTM", "GRU", "CNN"],
+                 load_pretrained = False,
+                 num_workers = 4,
+                 num_examples = 1000,
+                 output_size = 15
+                ):
+        super().__init__(knowledge_base,
+                         reasoner,
+                         refinement_operator,
+                         quality_func,
+                         heuristic_func,
+                         terminate_on_goal,
+                         iter_bound,
+                         max_num_of_concepts_tested,
+                         max_runtime,
+                         max_results,
+                         best_only,
+                         calculate_min_max)
+        assert hasattr(refinement_operator, "expressivity"), f"CLIP was developed to run more efficiently with ExpressRefinement, not {refinement_operator}"
+        self.predictor_name = predictor_name
+        self.pretrained_predictor_name = pretrained_predictor_name
+        self.knowledge_base_path = knowledge_base_path
+        self.load_pretrained = load_pretrained
+        self.num_workers = num_workers
+        self.output_size = output_size
+        self.num_examples = num_examples
+        self.path_of_embeddings = path_of_embeddings
+        assert os.path.isfile(self.path_of_embeddings), '!!! Wrong path for CLIP embeddings'
+        self.instance_embeddings = pd.read_csv(path_of_embeddings, index_col=0)
+        self.input_size = self.instance_embeddings.shape[1]
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.length_predictor = self.get_length_predictor()
+        
+    def get_length_predictor(self):
+        def load_model(predictor_name, load_pretrained):
+            if predictor_name is None:
+                return []
+            if predictor_name == 'SetTransformer':
+                model = LengthLearner_SetTransformer(self.input_size, self.output_size, proj_dim=256, num_heads=4, num_seeds=1, num_inds=32)
+            elif predictor_name == 'GRU':
+                model = LengthLearner_GRU(self.input_size, self.output_size, proj_dim=256, rnn_n_layers=2, drop_prob=0.2)
+            elif predictor_name == 'LSTM':
+                model = LengthLearner_LSTM(self.input_size, self.output_size, proj_dim=256, rnn_n_layers=2, drop_prob=0.2)
+            elif predictor_name == 'CNN':
+                model = LengthLearner_CNN(self.input_size, self.output_size, self.num_examples, proj_dim=256, kernel_size=[[5,7], [5,7]], stride=[[3,3], [3,3]])
+            pretrained_model_path = self.path_of_embeddings.split("embeddings")[0] + "trained_models/trained_" + predictor_name + ".pt"
+            if load_pretrained and os.path.isfile(pretrained_model_path):
+                model.load_state_dict(torch.load(pretrained_model_path, map_location=self.device))
+                model.eval()
+                print("\n Loaded length predictor!")
+            return model
+        
+        if not self.load_pretrained:
+            return [load_model(self.predictor_name, self.load_pretrained)]
+        elif self.load_pretrained and isinstance(self.pretrained_predictor_name, str):
+            return [load_model(self.pretrained_predictor_name, self.load_pretrained)]
+        elif self.load_pretrained and isinstance(self.pretrained_predictor_name, list):
+            return [load_model(name, self.load_pretrained) for name in self.pretrained_predictor_name]
+        
+    def refresh(self):
+        self.length_predictor = self.get_length_predictor()
+        
+    def collate_batch(self, batch):
+        pos_emb_list = []
+        neg_emb_list = []
+        target_labels = []
+        for pos_emb, neg_emb, label in batch:
+            if pos_emb.ndim != 2:
+                pos_emb = pos_emb.reshape(1, -1)
+            if neg_emb.ndim != 2:
+                neg_emb = neg_emb.reshape(1, -1)
+            pos_emb_list.append(pos_emb)
+            neg_emb_list.append(neg_emb)
+            target_labels.append(label)
+        pos_emb_list[0] = F.pad(pos_emb_list[0], (0, 0, 0, self.num_examples - pos_emb_list[0].shape[0]), "constant", 0)
+        pos_emb_list = pad_sequence(pos_emb_list, batch_first=True, padding_value=0)
+        neg_emb_list[0] = F.pad(neg_emb_list[0], (0, 0, 0, self.num_examples - neg_emb_list[0].shape[0]), "constant", 0)
+        neg_emb_list = pad_sequence(neg_emb_list, batch_first=True, padding_value=0)
+        return pos_emb_list, neg_emb_list, torch.LongTensor(target_labels)
+    
+    def collate_batch_inference(self, batch):
+        pos_emb_list = []
+        neg_emb_list = []
+        for pos_emb, neg_emb in batch:
+            if pos_emb.ndim != 2:
+                pos_emb = pos_emb.reshape(1, -1)
+            if neg_emb.ndim != 2:
+                neg_emb = neg_emb.reshape(1, -1)
+            pos_emb_list.append(pos_emb)
+            neg_emb_list.append(neg_emb)
+        pos_emb_list[0] = F.pad(pos_emb_list[0], (0, 0, 0, self.num_examples - pos_emb_list[0].shape[0]), "constant", 0)
+        pos_emb_list = pad_sequence(pos_emb_list, batch_first=True, padding_value=0)
+        neg_emb_list[0] = F.pad(neg_emb_list[0], (0, 0, 0, self.num_examples - neg_emb_list[0].shape[0]), "constant", 0)
+        neg_emb_list = pad_sequence(neg_emb_list, batch_first=True, padding_value=0)
+        return pos_emb_list, neg_emb_list
+    
+    def pos_neg_to_tensor(self, pos: Union[Set[OWLNamedIndividual]], neg: Union[Set[OWLNamedIndividual], Set[str]]):
+        if isinstance(pos[0], OWLNamedIndividual):
+            pos_str = [ind.get_iri().as_str().split("/")[-1] for ind in pos][:self.num_examples]
+            neg_str = [ind.get_iri().as_str().split("/")[-1] for ind in neg][:self.num_examples]
+        elif isinstance(pos[0], str):
+            pos_str = pos[:self.num_examples]
+            neg_str = neg[:self.num_examples]
+        else:
+            raise ValueError(f"Invalid input type, was expecting OWLNamedIndividual or str but found {type(pos[0])}")
+
+        assert self.load_pretrained and self.pretrained_predictor_name, \
+            "No pretrained model found. Please first train length predictors, see the <<train>> method below"
+
+        dataset = CLIPDataLoaderInference([("", pos_str, neg_str)], self.instance_embeddings, False, False)
+        dataloader = DataLoader(dataset, batch_size=1, num_workers=self.num_workers,
+                                collate_fn=self.collate_batch_inference, shuffle=False)
+        x_pos, x_neg = next(iter(dataloader))
+        return x_pos, x_neg
+
+    def predict_length(self, models, x1, x2):
+        for i, model in enumerate(models):
+            model.eval()
+            model.to(self.device)
+            x1 = x1.to(self.device)
+            x2 = x2.to(self.device)
+            if i == 0:
+                scores = model(x1, x2)
+            else:
+                sc = model(x1, x2)
+                scores = scores + sc
+        scores = scores / len(models)
+        prediction = int(scores.argmax(1).cpu())
+        print(f"\n***** Predicted length: {prediction} *****\n")
+        return prediction
+    
+    def fit(self, *args, **kwargs):
+        """
+        Find hypotheses that explain pos and neg.
+        """
+        self.clean()
+        max_runtime = kwargs.pop("max_runtime", None)
+        learning_problem = self.construct_learning_problem(PosNegLPStandard, args, kwargs)
+
+        assert not self.search_tree
+        self._learning_problem = learning_problem.encode_kb(self.kb)
+
+        if max_runtime is not None:
+            self._max_runtime = max_runtime
+        else:
+            self._max_runtime = self.max_runtime
+        
+        if (self.pretrained_predictor_name is not None) and (self.length_predictor is not None):
+            x_pos, x_neg = self.pos_neg_to_tensor(list(self._learning_problem.kb_pos)[:self.num_examples], list(self._learning_problem.kb_neg)[:self.num_examples])
+            max_length = self.predict_length(self.length_predictor, x_pos, x_neg)
+            self.operator.max_child_length = max_length
+            print(f'***** Predicted length: {max_length} *****')
+        else:
+            print('\n!!! No length predictor provided, running CLIP without length predictor !!!')
+
+            
+        root = self.make_node(_concept_operand_sorter.sort(self.start_class), is_root=True)
+        self._add_node(root, None)
+        assert len(self.heuristic_queue) == 1
+        # TODO:CD:suggest to add another assert,e.g. assert #. of instance in root > 1
+
+        self.start_time = time.time()
+        for j in range(1, self.iter_bound):
+            most_promising = self.next_node_to_expand(j)
+            tree_parent = self.tree_node(most_promising)
+            minimum_length = most_promising.h_exp
+            if logger.isEnabledFor(oplogging.TRACE):
+                logger.debug("now refining %s", most_promising)
+            for ref in self.downward_refinement(most_promising):
+                # we ignore all refinements with lower length
+                # (this also avoids duplicate node children)
+                # TODO: ignore too high depth
+                if ref.len < minimum_length:
+                    # ignoring refinement, it does not satisfy minimum_length condition
+                    continue
+
+                # note: tree_parent has to be equal to node_tree_parent(ref.parent_node)!
+                added = self._add_node(ref, tree_parent)
+
+                goal_found = added and ref.quality == 1.0
+
+                if goal_found and self.terminate_on_goal:
+                    return self.terminate()
+
+            if self.calculate_min_max:
+                # This is purely a statistical function, it does not influence CELOE
+                self.update_min_max_horiz_exp(most_promising)
+
+            if time.time() - self.start_time > self._max_runtime:
+                return self.terminate()
+
+            if self.number_of_tested_concepts >= self.max_num_of_concepts_tested:
+                return self.terminate()
+
+            if logger.isEnabledFor(oplogging.TRACE) and j % 100 == 0:
+                self._log_current_best(j)
+
+        return self.terminate()
+    
+    def train(self, data: Iterable[List[Tuple]], epochs=300, batch_size=256, learning_rate=1e-3, decay_rate=0.0,
+              clip_value=5.0, save_model=True, storage_path=None, optimizer='Adam', record_runtime=True,
+              example_sizes=None, shuffle_examples=False):
+        train_dataset = CLIPDataLoader(data, self.instance_embeddings, shuffle_examples=shuffle_examples, example_sizes=example_sizes)
+        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=self.num_workers,
+                                      collate_fn=self.collate_batch, shuffle=True)
+        if storage_path is None:
+            storage_path = self.knowledge_base_path[:self.knowledge_base_path.rfind("/")]
+        elif not os.path.exists(storage_path):
+            os.mkdir(storage_path)
+        trainer = CLIPTrainer(self, epochs=epochs, learning_rate=learning_rate, decay_rate=decay_rate,
+                              clip_value=clip_value, storage_path=storage_path)
+        trainer.train(train_dataloader, save_model, optimizer, record_runtime)
 
 
 class NCES(BaseNCES):
@@ -1110,7 +1378,7 @@ def load_model(learner_name, load_pretrained):
                                  0] + "trained_models/trained_" + learner_name + ".pt"
                 model.load_state_dict(torch.load(model_path, map_location=self.device))
                 model.eval()
-                print("\n\n Loaded pretrained model! \n")
+                print("\n Loaded synthesizer model!")
             return model
 
         if not self.load_pretrained:
diff --git a/ontolearn/data_struct.py b/ontolearn/data_struct.py
index f956794d..84b19cba 100644
--- a/ontolearn/data_struct.py
+++ b/ontolearn/data_struct.py
@@ -3,6 +3,7 @@
 import torch
 from collections import deque
 import pandas as pd
+import numpy as np
 import random
 
 
@@ -122,7 +123,7 @@ def clear(self):
         self.rewards.clear()
 
 
-class BaseDataLoader:
+class NCESBaseDataLoader:
 
     def __init__(self, vocab, inv_vocab):
 
@@ -154,7 +155,7 @@ def get_labels(self, target):
         return labels, len(target)
 
 
-class NCESDataLoader(BaseDataLoader, torch.utils.data.Dataset):
+class NCESDataLoader(NCESBaseDataLoader, torch.utils.data.Dataset):
 
     def __init__(self, data: list, embeddings, vocab, inv_vocab, shuffle_examples, max_length, example_sizes=None,
                  sorted_examples=True):
@@ -190,7 +191,7 @@ def __getitem__(self, idx):
                                                             self.max_length - length)]).long()
 
 
-class NCESDataLoaderInference(BaseDataLoader, torch.utils.data.Dataset):
+class NCESDataLoaderInference(NCESBaseDataLoader, torch.utils.data.Dataset):
 
     def __init__(self, data: list, embeddings, vocab, inv_vocab, shuffle_examples, sorted_examples=True):
         self.data_raw = data
@@ -209,6 +210,77 @@ def __getitem__(self, idx):
         elif self.shuffle_examples:
             random.shuffle(pos)
             random.shuffle(neg)
-        datapoint_pos = torch.FloatTensor(self.embeddings.loc[pos].values)
-        datapoint_neg = torch.FloatTensor(self.embeddings.loc[neg].values)
+        datapoint_pos = torch.FloatTensor(self.embeddings.loc[pos].values.squeeze())
+        datapoint_neg = torch.FloatTensor(self.embeddings.loc[neg].values.squeeze())
         return datapoint_pos, datapoint_neg
+
+    
+class CLIPDataLoader(torch.utils.data.Dataset):
+
+    def __init__(self, data: list, embeddings, shuffle_examples, example_sizes: list=None,
+                 k=5, sorted_examples=True):
+        self.data_raw = data
+        self.embeddings = embeddings
+        super().__init__()
+        self.shuffle_examples = shuffle_examples
+        self.example_sizes = example_sizes
+        self.k = k
+        self.sorted_examples = sorted_examples
+
+    def __len__(self):
+        return len(self.data_raw)
+
+    def __getitem__(self, idx):
+        key, value = self.data_raw[idx]
+        pos = value['positive examples']
+        neg = value['negative examples']
+        length = value['length']
+        if self.example_sizes is not None:
+            k_pos, k_neg = random.choice(self.example_sizes)
+            k_pos = min(k_pos, len(pos))
+            k_neg = min(k_neg, len(neg))
+            selected_pos = random.sample(pos, k_pos)
+            selected_neg = random.sample(neg, k_neg)
+        elif self.k is not None:
+            prob_pos_set = 1.0/(1+np.array(range(min(self.k, len(pos)), len(pos)+1, self.k)))
+            prob_pos_set = prob_pos_set/prob_pos_set.sum()
+            prob_neg_set = 1.0/(1+np.array(range(min(self.k, len(neg)), len(neg)+1, self.k)))
+            prob_neg_set = prob_neg_set/prob_neg_set.sum()
+            k_pos = np.random.choice(range(min(self.k, len(pos)), len(pos)+1, self.k), replace=False, p=prob_pos_set)
+            k_neg = np.random.choice(range(min(self.k, len(neg)), len(neg)+1, self.k), replace=False, p=prob_neg_set)
+            selected_pos = random.sample(pos, k_pos)
+            selected_neg = random.sample(neg, k_neg)
+        else:
+            selected_pos = pos
+            selected_neg = neg
+        if self.shuffle_examples:
+            random.shuffle(selected_pos)
+            random.shuffle(selected_neg)
+        datapoint_pos = torch.FloatTensor(self.embeddings.loc[selected_pos].values.squeeze())
+        datapoint_neg = torch.FloatTensor(self.embeddings.loc[selected_neg].values.squeeze())
+        return datapoint_pos, datapoint_neg, torch.LongTensor([length])
+    
+    
+class CLIPDataLoaderInference(torch.utils.data.Dataset):
+
+    def __init__(self, data: list, embeddings, shuffle_examples,
+                 sorted_examples=True):
+        self.data_raw = data
+        self.embeddings = embeddings
+        super().__init__()
+        self.shuffle_examples = shuffle_examples
+        self.sorted_examples = sorted_examples
+
+    def __len__(self):
+        return len(self.data_raw)
+
+    def __getitem__(self, idx):
+        _, pos, neg = self.data_raw[idx]
+        if self.sorted_examples:
+            pos, neg = sorted(pos), sorted(neg)
+        elif self.shuffle_examples:
+            random.shuffle(pos)
+            random.shuffle(neg)
+        datapoint_pos = torch.FloatTensor(self.embeddings.loc[pos].values.squeeze())
+        datapoint_neg = torch.FloatTensor(self.embeddings.loc[pos].values.squeeze())
+        return datapoint_pos, datapoint_neg
\ No newline at end of file
diff --git a/ontolearn/nces_trainer.py b/ontolearn/nces_trainer.py
index 547eca4d..a57eeded 100644
--- a/ontolearn/nces_trainer.py
+++ b/ontolearn/nces_trainer.py
@@ -6,7 +6,7 @@
 from collections import defaultdict
 import os
 import json
-from ontolearn.data_struct import BaseDataLoader
+from ontolearn.data_struct import NCESBaseDataLoader
 from torch.optim.lr_scheduler import ExponentialLR
 from torch.nn import functional as F
 from torch.nn.utils import clip_grad_value_
@@ -43,11 +43,11 @@ def soft(arg1, arg2):
             arg1_ = arg1
             arg2_ = arg2
             if isinstance(arg1_, str):
-                arg1_ = set(before_pad(BaseDataLoader.decompose(arg1_)))
+                arg1_ = set(before_pad(NCESBaseDataLoader.decompose(arg1_)))
             else:
                 arg1_ = set(before_pad(arg1_))
             if isinstance(arg2_, str):
-                arg2_ = set(before_pad(BaseDataLoader.decompose(arg2_)))
+                arg2_ = set(before_pad(NCESBaseDataLoader.decompose(arg2_)))
             else:
                 arg2_ = set(before_pad(arg2_))
             return 100*float(len(arg1_.intersection(arg2_)))/len(arg1_.union(arg2_))
@@ -56,11 +56,11 @@ def hard(arg1, arg2):
             arg1_ = arg1
             arg2_ = arg2
             if isinstance(arg1_, str):
-                arg1_ = before_pad(BaseDataLoader.decompose(arg1_))
+                arg1_ = before_pad(NCESBaseDataLoader.decompose(arg1_))
             else:
                 arg1_ = before_pad(arg1_)
             if isinstance(arg2_, str):
-                arg2_ = before_pad(BaseDataLoader.decompose(arg2_))
+                arg2_ = before_pad(NCESBaseDataLoader.decompose(arg2_))
             else:
                 arg2_ = before_pad(arg2_)
             return 100*float(sum(map(lambda x, y: x == y, arg1_, arg2_)))/max(len(arg1_), len(arg2_))

From 864528cb6cdcac8038781fa536e2abbf7b13819f Mon Sep 17 00:00:00 2001
From: Jean-KOUAGOU <jeank@aims.ac.za>
Date: Mon, 4 Mar 2024 17:21:53 +0100
Subject: [PATCH 7/8] update documentation

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1c78cae1..03150771 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ Learning algorithms:
 - **NCES2** &rarr; (soon) [Neural Class Expression Synthesis in ALCHIQ(D)](https://papers.dice-research.org/2023/ECML_NCES2/NCES2_public.pdf)
 - **NCES** &rarr; [Neural Class Expression Synthesis](https://link.springer.com/chapter/10.1007/978-3-031-33455-9_13) 
 - **NERO** &rarr; (soon) [Learning Permutation-Invariant Embeddings for Description Logic Concepts](https://link.springer.com/chapter/10.1007/978-3-031-30047-9_9)
-- **CLIP** &rarr; (soon) [Learning Concept Lengths Accelerates Concept Learning in ALC](https://link.springer.com/chapter/10.1007/978-3-031-06981-9_14)
+- **CLIP** &rarr; [Learning Concept Lengths Accelerates Concept Learning in ALC](https://link.springer.com/chapter/10.1007/978-3-031-06981-9_14)
 - **CELOE** &rarr; [Class Expression Learning for Ontology Engineering](https://www.sciencedirect.com/science/article/abs/pii/S1570826811000023)
 - **OCEL** &rarr; A limited version of CELOE
 

From f9f9b0932bee2b176a5bb22abce330cc894240f5 Mon Sep 17 00:00:00 2001
From: Jean-KOUAGOU <jeank@aims.ac.za>
Date: Mon, 4 Mar 2024 19:28:20 +0100
Subject: [PATCH 8/8] update documentation

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 03150771..f2008dd9 100644
--- a/README.md
+++ b/README.md
@@ -133,7 +133,7 @@ Note that F1 scores denote the quality of the find/constructed concept w.r.t. E^
 
 ### Mutagenesis Benchmark Results
 ```shell
-python examples/concept_learning_evaluation.py --lps LPs/Mutagenesis/lps.json --kb KGs/Mutagenesis/mutagenesis.owl --max_runtime 60 --report mutagenesis_results.csv && python -c 'import pandas as pd; print(pd.read_csv("mutagenesis_results.csv", index_col=0).to_markdown(floatfmt=".3f"))'
+python examples/concept_learning_cv_evaluation.py --path_of_nces_embeddings NCESData/mutagenesis/embeddings/ConEx_entity_embeddings.csv --path_of_clip_embeddings CLIPData/mutagenesis/embeddings/ConEx_entity_embeddings.csv --folds 10 --kb KGs/Mutagenesis/mutagenesis.owl --lps LPs/Mutagenesis/lps.json --max_runtime 60 --report mutagenesis_results.csv && python -c 'import pandas as pd; print(pd.read_csv("mutagenesis_results.csv", index_col=0).to_markdown(floatfmt=".3f"))'
 ```
 | LP       |   Train-F1-OCEL |   Test-F1-OCEL |   RT-OCEL |   Train-F1-CELOE |   Test-F1-CELOE |   RT-CELOE |   Train-F1-Evo |   Test-F1-Evo |   RT-Evo |   Train-F1-DRILL |   Test-F1-DRILL |   RT-DRILL |   Train-F1-TDL |   Test-F1-TDL |   RT-TDL |   Train-F1-NCES |   Test-F1-NCES |   RT-NCES |   Train-F1-CLIP |   Test-F1-CLIP |   RT-CLIP |
 |:---------|----------------:|---------------:|----------:|-----------------:|----------------:|-----------:|---------------:|--------------:|---------:|-----------------:|----------------:|-----------:|---------------:|--------------:|---------:|----------------:|---------------:|----------:|----------------:|---------------:|----------:|
@@ -141,7 +141,7 @@ python examples/concept_learning_evaluation.py --lps LPs/Mutagenesis/lps.json --
 
 ### Carcinogenesis Benchmark Results
 ```shell
-python examples/concept_learning_evaluation.py --lps LPs/Carcinogenesis/lps.json --kb KGs/Carcinogenesis/carcinogenesis.owl --max_runtime 60 --report carcinogenesis_results.csv  && python -c 'import pandas as pd; print(pd.read_csv("carcinogenesis_results.csv", index_col=0).to_markdown(floatfmt=".3f"))'
+python examples/concept_learning_cv_evaluation.py --path_of_nces_embeddings NCESData/carcinogenesis/embeddings/ConEx_entity_embeddings.csv --path_of_clip_embeddings CLIPData/carcinogenesis/embeddings/ConEx_entity_embeddings.csv --folds 10 --kb KGs/Carcinogenesis/carcinogenesis.owl --lps LPs/Carcinogenesis/lps.json --max_runtime 60 --report carcinogenesis_results.csv && python -c 'import pandas as pd; print(pd.read_csv("carcinogenesis_results.csv", index_col=0).to_markdown(floatfmt=".3f"))'
 ```
 
 | LP       |   Train-F1-OCEL |   Test-F1-OCEL |   RT-OCEL |   Train-F1-CELOE |   Test-F1-CELOE |   RT-CELOE |   Train-F1-Evo |   Test-F1-Evo |   RT-Evo |   Train-F1-DRILL |   Test-F1-DRILL |   RT-DRILL |   Train-F1-TDL |   Test-F1-TDL |   RT-TDL |   Train-F1-NCES |   Test-F1-NCES |   RT-NCES |   Train-F1-CLIP |   Test-F1-CLIP |   RT-CLIP |