dice-group
diff --git a/‎README.md
Lines changed: 29 additions & 29 deletions b/‎README.md
Lines changed: 29 additions & 29 deletions
diff --git a/‎docs/usage/01_introduction.md
Lines changed: 1 addition & 1 deletion b/‎docs/usage/01_introduction.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/clip_notebook.ipynb
Lines changed: 234 additions & 0 deletions b/‎examples/clip_notebook.ipynb
Lines changed: 234 additions & 0 deletions
diff --git a/‎examples/concept_learning_cv_evaluation.py
Lines changed: 51 additions & 13 deletions b/‎examples/concept_learning_cv_evaluation.py
Lines changed: 51 additions & 13 deletions
diff --git a/‎examples/example_knowledge_base.py
Lines changed: 1 addition & 1 deletion b/‎examples/example_knowledge_base.py
Lines changed: 1 addition & 1 deletion
@@ -1,6 +1,6 @@
 # Ontolearn
 
-**Version:** ontolearn 0.6.1
+**Version:** ontolearn 0.7.0
 
 **GitHub repository:** [https://github.com/dice-group/Ontolearn](https://github.com/dice-group/Ontolearn)
 
 
@@ -0,0 +1,234 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "blond-letter",
+   "metadata": {},
+   "source": [
+    "# CLIP Notebook\n",
+    "This is a jupyter notebook file to execute [CLIP](ontolearn.concept_learner.CLIP) and generate predictive results. We recommend you to see the [concept learners](../docs/usage/06_concept_learners.md) guide before continuing with the execution.\n",
+    "Also if you have not done it already, from the main directory \"Ontolearn\", run the commands for Datasets mentioned [here](https://ontolearn-docs-dice-group.netlify.app/usage/02_installation#download-external-files) to download the datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "japanese-ivory",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Warning: SQLite3 version 3.40.0 and 3.41.2 have huge performance regressions; please install version 3.41.1 or 3.42!\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "from ontolearn.knowledge_base import KnowledgeBase\n",
+    "from ontolearn.concept_learner import CLIP\n",
+    "from ontolearn.refinement_operators import ExpressRefinement\n",
+    "from ontolearn.learning_problem import PosNegLPStandard\n",
+    "from owlapy.model import OWLNamedIndividual, IRI\n",
+    "from ontolearn.utils import setup_logging\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "pending-coast",
+   "metadata": {},
+   "source": [
+    "Open `uncle_lp.json` where we have stored the learning problem for the concept of 'Uncle' and the path to the 'family' ontology."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "beginning-syntax",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "with open('uncle_lp.json') as json_file:\n",
+    "    settings = json.load(json_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "humanitarian-heating",
+   "metadata": {},
+   "source": [
+    "Create an instance of the class `KnowledeBase` by using the path that is stored in `settings`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "caroline-indiana",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "kb = KnowledgeBase(path=settings['data_path'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "lucky-activation",
+   "metadata": {},
+   "source": [
+    "Retreive the IRIs of the positive and negative examples of Uncle from `settings` and create an instance of `PosNegLPStandard`. (more info about this [here](../docs/usage/06_concept_learners.md#configure-the-learning-problem))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "processed-patrick",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "examples = settings['Uncle']\n",
+    "p = set(examples['positive_examples'])\n",
+    "n = set(examples['negative_examples'])\n",
+    "typed_pos = set(map(OWLNamedIndividual, map(IRI.create, p)))\n",
+    "typed_neg = set(map(OWLNamedIndividual, map(IRI.create, n)))\n",
+    "lp = PosNegLPStandard(pos=typed_pos, neg=typed_neg)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "mechanical-latin",
+   "metadata": {},
+   "source": [
+    "Create a model of [CLIP](ontolearn.concept_learner.CLIP) and fit the learning problem to the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "171d1aa4-6c12-42c0-b7e9-8cf2dce85ff9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "op = ExpressRefinement(knowledge_base=kb, use_inverse=False,\n",
+    "                          use_numeric_datatypes=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "binding-moderator",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      " Loaded length predictor!\n",
+      "\n",
+      " Loaded length predictor!\n",
+      "\n",
+      " Loaded length predictor!\n",
+      "\n",
+      " Loaded length predictor!\n",
+      "\n",
+      "***** Predicted length: 5 *****\n",
+      "\n",
+      "***** Predicted length: 5 *****\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<ontolearn.concept_learner.CLIP at 0x7f762ae039a0>"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = CLIP(knowledge_base=kb, path_of_embeddings=\"../CLIPData/family/embeddings/ConEx_entity_embeddings.csv\",\n",
+    "             refinement_operator=op, load_pretrained=True, max_runtime=200)\n",
+    "model.fit(lp)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d981f2b9-3489-494e-825d-6a72ee480d4f",
+   "metadata": {},
+   "source": [
+    "## Retrieve top 3 hypotheses and print them."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "c6a90b21-3594-441d-bed0-eb822db5f993",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'ontolearn.search.OENode'> at 0x0304774\tMale ⊓ (∀ hasParent.Grandparent)\tQuality:0.90476\tHeuristic:0.40407\tDepth:2\tH_exp:6\t|RC|:7\t|Indv.|:None\n",
+      "<class 'ontolearn.search.OENode'> at 0x0ca154a\tMale ⊓ (∀ hasChild.Grandchild)\tQuality:0.90476\tHeuristic:0.36919\tDepth:1\tH_exp:7\t|RC|:7\t|Indv.|:None\n",
+      "<class 'ontolearn.search.OENode'> at 0x2adbb89\tMale ⊓ (∀ hasChild.(¬Grandfather))\tQuality:0.88889\tHeuristic:0.39044\tDepth:3\tH_exp:6\t|RC|:0\t|Indv.|:None\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[None, None, None]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hypotheses = list(model.best_hypotheses(n=3))\n",
+    "[print(_) for _ in hypotheses]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "onto",
+   "language": "python",
+   "name": "onto"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
@@ -12,7 +12,8 @@
 import time
 import pandas as pd
 from ontolearn.knowledge_base import KnowledgeBase
-from ontolearn.concept_learner import CELOE, OCEL, EvoLearner, NCES
+from ontolearn.concept_learner import CELOE, OCEL, EvoLearner, NCES, CLIP
+from ontolearn.refinement_operators import ExpressRefinement
 from ontolearn.learners import Drill, TDL
 from ontolearn.learning_problem import PosNegLPStandard
 from ontolearn.metrics import F1
@@ -32,27 +33,41 @@ def dl_concept_learning(args):
         settings = json.load(json_file)
 
     kb = KnowledgeBase(path=args.kb)
-    ocel = OCEL(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(),
+    ocel = OCEL(knowledge_base=kb, quality_func=F1(),
                 max_runtime=args.max_runtime)
-    celoe = CELOE(knowledge_base=KnowledgeBase(path=args.kb), quality_func=F1(),
+    celoe = CELOE(knowledge_base=kb, quality_func=F1(),
                   max_runtime=args.max_runtime)
-    drill = Drill(knowledge_base=KnowledgeBase(path=args.kb), path_pretrained_kge=args.path_pretrained_kge,
+    drill = Drill(knowledge_base=kb, path_pretrained_kge=args.path_pretrained_kge,
                   quality_func=F1(), max_runtime=args.max_runtime)
-    tdl = TDL(knowledge_base=KnowledgeBase(path=args.kb),
+    tdl = TDL(knowledge_base=kb,
               dataframe_triples=pd.DataFrame(
                   data=sorted([(str(s), str(p), str(o)) for s, p, o in Graph().parse(args.kb)], key=lambda x: len(x)),
                   columns=['subject', 'relation', 'object'], dtype=str),
               kwargs_classifier={"random_state": 0},
               max_runtime=args.max_runtime)
     nces = NCES(knowledge_base_path=args.kb, quality_func=F1(), path_of_embeddings=args.path_of_nces_embeddings,
                 pretrained_model_name=["LSTM", "GRU", "SetTransformer"], num_predictions=5)
+    
+    express_rho = ExpressRefinement(kb, use_inverse=False, use_numeric_datatypes=False)
+    clip = CLIP(knowledge_base=kb, refinement_operator=express_rho, quality_func=F1(), 
+                max_num_of_concepts_tested=int(1e9), max_runtime=args.max_runtime, 
+                path_of_embeddings=args.path_of_clip_embeddings,
+                pretrained_predictor_name=["LSTM", "GRU", "SetTransformer", "CNN"], load_pretrained=True)
 
     # dictionary to store the data
     data = dict()
-    for str_target_concept, examples in settings['problems'].items():
+    if "problems" in settings:
+        problems = settings['problems'].items()
+        positives_key = "positive_examples"
+        negatives_key = "negative_examples"
+    else:
+        problems = settings.items()
+        positives_key = "positive examples"
+        negatives_key = "negative examples"
+    for str_target_concept, examples in problems:
         print('Target concept: ', str_target_concept)
-        p = examples['positive_examples']
-        n = examples['negative_examples']
+        p = examples[positives_key]
+        n = examples[negatives_key]
 
         kf = StratifiedKFold(n_splits=args.folds, shuffle=True, random_state=args.random_seed)
         X = np.array(p + n)
@@ -67,16 +82,16 @@ def dl_concept_learning(args):
             train_neg = {neg_individual for neg_individual in X[train_index][y[train_index] == 0]}
 
             # Sanity checking for individuals used for training.
-            assert train_pos.issubset(examples['positive_examples'])
-            assert train_neg.issubset(examples['negative_examples'])
+            assert train_pos.issubset(examples[positives_key])
+            assert train_neg.issubset(examples[negatives_key])
 
             # () Extract positive and negative examples from test fold
             test_pos = {pos_individual for pos_individual in X[test_index][y[test_index] == 1]}
             test_neg = {neg_individual for neg_individual in X[test_index][y[test_index] == 0]}
 
             # Sanity checking for individuals used for testing.
-            assert test_pos.issubset(examples['positive_examples'])
-            assert test_neg.issubset(examples['negative_examples'])
+            assert test_pos.issubset(examples[positives_key])
+            assert test_neg.issubset(examples[negatives_key])
             train_lp = PosNegLPStandard(pos=set(map(OWLNamedIndividual, map(IRI.create, train_pos))),
                                         neg=set(map(OWLNamedIndividual, map(IRI.create, train_neg))))
 
@@ -217,6 +232,28 @@ def dl_concept_learning(args):
             print(f"NCES Train Quality: {train_f1_nces:.3f}", end="\t")
             print(f"NCES Test Quality: {test_f1_nces:.3f}", end="\t")
             print(f"NCES Runtime: {rt_nces:.3f}")
+            
+            
+            print("CLIP starts..", end="\t")
+            start_time = time.time()
+            pred_clip = clip.fit(train_lp).best_hypotheses(n=1)
+            rt_clip = time.time() - start_time
+            print("CLIP ends..", end="\t")
+            # () Quality on the training data
+            train_f1_clip = compute_f1_score(individuals={i for i in kb.individuals(pred_clip.concept)},
+                                              pos=train_lp.pos,
+                                              neg=train_lp.neg)
+            # () Quality on test data
+            test_f1_clip = compute_f1_score(individuals={i for i in kb.individuals(pred_clip.concept)},
+                                             pos=test_lp.pos,
+                                             neg=test_lp.neg)
+            
+            data.setdefault("Train-F1-CLIP", []).append(train_f1_clip)
+            data.setdefault("Test-F1-CLIP", []).append(test_f1_clip)
+            data.setdefault("RT-CLIP", []).append(rt_clip)
+            print(f"CLIP Train Quality: {train_f1_clip:.3f}", end="\t")
+            print(f"CLIP Test Quality: {test_f1_clip:.3f}", end="\t")
+            print(f"CLIP Runtime: {rt_clip:.3f}")
 
     df = pd.DataFrame.from_dict(data)
     df.to_csv(args.report, index=False)
@@ -227,12 +264,13 @@ def dl_concept_learning(args):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Description Logic Concept Learning')
     parser.add_argument("--max_runtime", type=int, default=10, help="Max runtime")
-    parser.add_argument("--lps", type=str, required=True, help="Path fto the learning problems")
+    parser.add_argument("--lps", type=str, required=True, help="Path to the learning problems")
     parser.add_argument("--folds", type=int, default=10, help="Number of folds of cross validation.")
     parser.add_argument("--kb", type=str, required=True,
                         help="Knowledge base")
     parser.add_argument("--path_pretrained_kge", type=str, default=None)
     parser.add_argument("--path_of_nces_embeddings", type=str, default=None)
+    parser.add_argument("--path_of_clip_embeddings", type=str, default=None)
     parser.add_argument("--report", type=str, default="report.csv")
     parser.add_argument("--random_seed", type=int, default=1)
     dl_concept_learning(parser.parse_args())
@@ -38,7 +38,7 @@
 print('*' * 100)
 
 # Direct concept hierarchy from Top to Bottom.
-for concept in kb.class_hierarchy().items():
+for concept in kb.class_hierarchy.items():
     print(f'{concept.get_iri().as_str()} => {[c.get_iri().as_str() for c in kb.get_direct_sub_concepts(concept)]}')
 print('*' * 100)