diff --git a/HGCN - Hyperlib.ipynb b/HGCN - Hyperlib.ipynb
new file mode 100644
index 0000000..bce73ec
--- /dev/null
+++ b/HGCN - Hyperlib.ipynb	
@@ -0,0 +1,511 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "8859cdb2",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From D:\\PythonVirtualEnvironments\\hyperlib\\lib\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import tensorflow as tf\n",
+    "import tensorflow_datasets as tfds\n",
+    "\n",
+    "from hyperlib.manifold.lorentz import Lorentz\n",
+    "from hyperlib.manifold.poincare import Poincare\n",
+    "from tensorflow import keras\n",
+    "from tensorflow.keras.callbacks import EarlyStopping\n",
+    "from tensorflow.keras.losses import CategoricalCrossentropy\n",
+    "from tensorflow.keras.optimizers import Adam\n",
+    "from spektral.data import SingleLoader\n",
+    "from spektral.datasets.citation import Citation\n",
+    "from spektral.transforms import LayerPreprocess\n",
+    "from spektral.layers import GCNConv\n",
+    "from spektral.models.gcn import GCN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "d4176c3c-83c2-45f6-af9a-e240973fc729",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tf.config.experimental.list_physical_devices()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "6e7438dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import hyperlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8d654d79",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from hyperlib.nn.layers.linear import ActivationHyperbolic, LinearHyperbolic\n",
+    "from hyperlib.nn.layers.graph import HGCLayer, HyperbolicAggregation, HGCNLP\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c1f1fcd8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "9ebe4a61",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From D:\\PythonVirtualEnvironments\\hyperlib\\lib\\site-packages\\keras\\src\\backend.py:873: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "#ConvHyperbolic(16, self.manifold, self.c0, self.c1, activation=\"relu\")\n",
+    "hgc_layer = HGCLayer(100, Poincare(), 0.4, \"relu\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b9421213",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from hyperlib.utils.data_utils import load_data, load_data_lp, mask_edges, process"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c22811d8",
+   "metadata": {},
+   "source": [
+    "## Load data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "b5fa8225",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "D:\\PythonVirtualEnvironments\\hyperlib\\lib\\site-packages\\scipy\\sparse\\_index.py:145: SparseEfficiencyWarning: Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.\n",
+      "  self._set_arrayXarray(i, j, x)\n"
+     ]
+    }
+   ],
+   "source": [
+    "from spektral.datasets import citation\n",
+    "\n",
+    "dataset = citation.Cora()\n",
+    "graph = dataset[0]\n",
+    "\n",
+    "# Node features\n",
+    "X = graph.x \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "4bb7fe4d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "2708"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "graph.n_nodes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "6e4ba242",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<2708x2708 sparse array of type '<class 'numpy.float32'>'\n",
+       "\twith 10556 stored elements in Compressed Sparse Row format>"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "graph.a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "8b32af73",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(2708, 1433)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "949e5d23",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "D:\\Nalex\\Playgrounds\\HyperbolicPackage\\branch\\hyperlib\\hyperlib\\utils\\data_utils.py:67: UserWarning: torch.sparse.SparseTensor(indices, values, shape, *, device=) is deprecated.  Please use torch.sparse_coo_tensor(indices, values, shape, dtype=, device=). (Triggered internally at ..\\torch\\csrc\\utils\\tensor_new.cpp:620.)\n",
+      "  return torch.sparse.FloatTensor(indices, values, shape)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(3664290, 3696)"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = load_data_lp(\"cora\", True, \"resources\\\\data\\\\hgcn\\\\cora\")\n",
+    "\n",
+    "val_prop = 0.1\n",
+    "test_prop = 0.2\n",
+    "split_seed = 42\n",
+    "normalize_adj = False\n",
+    "normalize_feats = False\n",
+    "\n",
+    "adj = data['adj_train']\n",
+    "\n",
+    "adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = mask_edges(\n",
+    "        adj, val_prop, test_prop, split_seed\n",
+    ")\n",
+    "data['adj_train'] = adj_train\n",
+    "data['train_edges'], data['train_edges_false'] = train_edges, train_edges_false\n",
+    "data['val_edges'], data['val_edges_false'] = val_edges, val_edges_false\n",
+    "data['test_edges'], data['test_edges_false'] = test_edges, test_edges_false\n",
+    "    \n",
+    "data['adj_train_norm'], data['features'] = process(\n",
+    "    data['adj_train'], data['features'], normalize_adj, normalize_feats\n",
+    ")\n",
+    "\n",
+    "\n",
+    "data.keys()\n",
+    "\n",
+    "data[\"adj_train\"]\n",
+    "\n",
+    "data['adj_train_norm']\n",
+    "\n",
+    "data[\"features\"]\n",
+    "\n",
+    "data['features']\n",
+    "\n",
+    "n_nodes, feat_dim = data['features'].shape\n",
+    "\n",
+    "n_nodes, feat_dim\n",
+    "\n",
+    "nb_false_edges = len(data['train_edges_false'])\n",
+    "nb_edges = len(data['train_edges'])\n",
+    "nb_false_edges, nb_edges"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "b8e564eb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<hyperlib.nn.layers.linear.LinearHyperbolic at 0x1eb18106620>"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "LinearHyperbolic(data['features'][1].shape, Lorentz(), 1.0, activation=None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "dea472bb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<hyperlib.nn.layers.linear.LinearHyperbolic at 0x1eb18136860>"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "LinearHyperbolic(1433, Lorentz(), 1.0, activation=None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "8ee576b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def convert_sparse_matrix_to_sparse_tensor(X):\n",
+    "    coo = X.tocoo()\n",
+    "    indices = np.mat([coo.row, coo.col]).transpose()\n",
+    "    return tf.SparseTensor(indices, coo.data, coo.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b629088",
+   "metadata": {},
+   "source": [
+    "## Train Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "76fd2050",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hgcn = HGCNLP(1433)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "02e01032",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<2708x2708 sparse array of type '<class 'numpy.float32'>'\n",
+       "\twith 10556 stored elements in Compressed Sparse Row format>"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "graph.a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "ad9c87d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def convert_sparse_matrix_to_sparse_tensor(X):\n",
+    "    coo = X.tocoo()\n",
+    "    indices = np.mat([coo.row, coo.col]).transpose()\n",
+    "    return tf.SparseTensor(indices, coo.data, coo.shape)\n",
+    "\n",
+    "graph_tf = convert_sparse_matrix_to_sparse_tensor(graph.a)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "80b2593f-6aee-4810-af6f-392a9812a36e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<2708x2708 sparse array of type '<class 'numpy.float32'>'\n",
+       "\twith 10556 stored elements in Compressed Sparse Row format>"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "graph.a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "2e3074ad",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 0\n",
+      "x shape empmap0 (2708, 1433)\n",
+      "HGCLayer x shape (2708, 1433)\n"
+     ]
+    },
+    {
+     "ename": "InvalidArgumentError",
+     "evalue": "Exception encountered when calling layer 'linear_hyperbolic_3' (type LinearHyperbolic).\n\n{{function_node __wrapped__ConcatV2_N_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} ConcatOp : Ranks of all input tensors should match: shape[0] = [2708,1] vs. shape[1] = [1,2708,1432] [Op:ConcatV2] name: concat\n\nCall arguments received by layer 'linear_hyperbolic_3' (type LinearHyperbolic):\n  • inputs=tf.Tensor(shape=(2708, 1433), dtype=float32)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mInvalidArgumentError\u001b[0m                      Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[21], line 4\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m epoch \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(epochs):\n\u001b[0;32m      3\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEpoch \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mepoch\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m----> 4\u001b[0m     \u001b[43mhgcn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgraph_tf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m      5\u001b[0m     \u001b[38;5;66;03m#grads = tape.gradient(loss_value, self.embedding.trainable_weights)\u001b[39;00m\n\u001b[0;32m      6\u001b[0m     \u001b[38;5;66;03m#optimizer.apply_gradients(zip(grads, self.embedding.trainable_weights))\u001b[39;00m\n\u001b[0;32m      7\u001b[0m \n\u001b[0;32m      8\u001b[0m     \u001b[38;5;66;03m#if step % 100 == 0:\u001b[39;00m\n\u001b[0;32m      9\u001b[0m     \u001b[38;5;66;03m#    log.info(\"Training loss (for one batch) at step %d: %.4f\"\u001b[39;00m\n\u001b[0;32m     10\u001b[0m     \u001b[38;5;66;03m#        % (step, float(loss_value)))\u001b[39;00m\n",
+      "File \u001b[1;32mD:\\PythonVirtualEnvironments\\hyperlib\\lib\\site-packages\\keras\\src\\utils\\traceback_utils.py:70\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m     67\u001b[0m     filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n\u001b[0;32m     68\u001b[0m     \u001b[38;5;66;03m# To get the full stack trace, call:\u001b[39;00m\n\u001b[0;32m     69\u001b[0m     \u001b[38;5;66;03m# `tf.debugging.disable_traceback_filtering()`\u001b[39;00m\n\u001b[1;32m---> 70\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m e\u001b[38;5;241m.\u001b[39mwith_traceback(filtered_tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m     71\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m     72\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m filtered_tb\n",
+      "File \u001b[1;32mD:\\Nalex\\Playgrounds\\HyperbolicPackage\\branch\\hyperlib\\hyperlib\\nn\\layers\\graph.py:76\u001b[0m, in \u001b[0;36mHGCNLP.call\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m     72\u001b[0m x_hyp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmanifold\u001b[38;5;241m.\u001b[39mproj(x_hyp, c\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mc1)\n\u001b[0;32m     73\u001b[0m \u001b[38;5;66;03m# Map euclidean features to Hyperbolic space\u001b[39;00m\n\u001b[0;32m     74\u001b[0m \u001b[38;5;66;03m#x = self.manifold.expmap0(x, c=self.c_map)\u001b[39;00m\n\u001b[0;32m     75\u001b[0m \u001b[38;5;66;03m# Stack multiple hyperbolic graph convolution layers\u001b[39;00m\n\u001b[1;32m---> 76\u001b[0m x, adj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv0\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madj\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     77\u001b[0m \u001b[38;5;66;03m#x, adj = self.conv1((x, adj))\u001b[39;00m\n\u001b[0;32m     78\u001b[0m \u001b[38;5;66;03m#x, adj = self.conv2((x, adj))\u001b[39;00m\n\u001b[0;32m     79\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     91\u001b[0m \u001b[38;5;66;03m#         regularization objective in node classification tasks, to encourage embeddings at the last layer to\u001b[39;00m\n\u001b[0;32m     92\u001b[0m \u001b[38;5;66;03m#         preserve the graph structure\u001b[39;00m\n\u001b[0;32m     93\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n",
+      "File \u001b[1;32mD:\\Nalex\\Playgrounds\\HyperbolicPackage\\branch\\hyperlib\\hyperlib\\nn\\layers\\graph.py:42\u001b[0m, in \u001b[0;36mHGCLayer.call\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m     38\u001b[0m \u001b[38;5;66;03m# x = self.manifold.logmap0(x, c=self.c)\u001b[39;00m\n\u001b[0;32m     39\u001b[0m \n\u001b[0;32m     40\u001b[0m \u001b[38;5;66;03m# Step 2 (attention-based neighborhood aggregation)\u001b[39;00m\n\u001b[0;32m     41\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHGCLayer x shape\u001b[39m\u001b[38;5;124m'\u001b[39m, x\u001b[38;5;241m.\u001b[39mshape)\n\u001b[1;32m---> 42\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear_layer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     43\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maggregation_layer((x, adj))\n\u001b[0;32m     45\u001b[0m \u001b[38;5;66;03m# Step 3 (non-linear activation with different curvatures)\u001b[39;00m\n",
+      "File \u001b[1;32mD:\\Nalex\\Playgrounds\\HyperbolicPackage\\branch\\hyperlib\\hyperlib\\nn\\layers\\linear.py:42\u001b[0m, in \u001b[0;36mLinearHyperbolic.call\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m     40\u001b[0m \u001b[38;5;66;03m# TODO: remove casting and instead recommend setting default tfd values to float64\u001b[39;00m\n\u001b[0;32m     41\u001b[0m inputs \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mcast(inputs, tf\u001b[38;5;241m.\u001b[39mfloat64)\n\u001b[1;32m---> 42\u001b[0m mv \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmanifold\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmobius_matvec\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkernel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     43\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmanifold\u001b[38;5;241m.\u001b[39mproj(mv, c\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mc)\n\u001b[0;32m     44\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mLinearHyperbolic x shape\u001b[39m\u001b[38;5;124m'\u001b[39m, inputs\u001b[38;5;241m.\u001b[39mshape)\n",
+      "File \u001b[1;32mD:\\Nalex\\Playgrounds\\HyperbolicPackage\\branch\\hyperlib\\hyperlib\\manifold\\lorentz.py:147\u001b[0m, in \u001b[0;36mLorentz.mobius_matvec\u001b[1;34m(self, m, x, c)\u001b[0m\n\u001b[0;32m    146\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmobius_matvec\u001b[39m(\u001b[38;5;28mself\u001b[39m, m, x, c):\n\u001b[1;32m--> 147\u001b[0m     u \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlogmap0\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    148\u001b[0m     mu \u001b[38;5;241m=\u001b[39m u \u001b[38;5;241m@\u001b[39m m\n\u001b[0;32m    149\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexpmap0(mu, c)\n",
+      "File \u001b[1;32mD:\\Nalex\\Playgrounds\\HyperbolicPackage\\branch\\hyperlib\\hyperlib\\manifold\\lorentz.py:139\u001b[0m, in \u001b[0;36mLorentz.logmap0\u001b[1;34m(self, x, c)\u001b[0m\n\u001b[0;32m    136\u001b[0m res \u001b[38;5;241m=\u001b[39m sqrtK \u001b[38;5;241m*\u001b[39m arcosh(theta) \u001b[38;5;241m*\u001b[39m y \u001b[38;5;241m/\u001b[39m y_norm\n\u001b[0;32m    138\u001b[0m zeros \u001b[38;5;241m=\u001b[39m tf\u001b[38;5;241m.\u001b[39mzeros((b, \u001b[38;5;241m1\u001b[39m), dtype\u001b[38;5;241m=\u001b[39mres\u001b[38;5;241m.\u001b[39mdtype)\n\u001b[1;32m--> 139\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconcat\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mzeros\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mres\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "\u001b[1;31mInvalidArgumentError\u001b[0m: Exception encountered when calling layer 'linear_hyperbolic_3' (type LinearHyperbolic).\n\n{{function_node __wrapped__ConcatV2_N_2_device_/job:localhost/replica:0/task:0/device:CPU:0}} ConcatOp : Ranks of all input tensors should match: shape[0] = [2708,1] vs. shape[1] = [1,2708,1432] [Op:ConcatV2] name: concat\n\nCall arguments received by layer 'linear_hyperbolic_3' (type LinearHyperbolic):\n  • inputs=tf.Tensor(shape=(2708, 1433), dtype=float32)"
+     ]
+    }
+   ],
+   "source": [
+    "epochs = 10\n",
+    "for epoch in range(epochs):\n",
+    "    print(f\"Epoch {epoch}\")\n",
+    "    hgcn((X, graph_tf))\n",
+    "    #grads = tape.gradient(loss_value, self.embedding.trainable_weights)\n",
+    "    #optimizer.apply_gradients(zip(grads, self.embedding.trainable_weights))\n",
+    "\n",
+    "    #if step % 100 == 0:\n",
+    "    #    log.info(\"Training loss (for one batch) at step %d: %.4f\"\n",
+    "    #        % (step, float(loss_value)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a4663163",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "epochs = 10\n",
+    "for epoch in range(epochs):\n",
+    "    print(f\"Epoch {epoch}\")\n",
+    "    hgcn((X, graph.a))\n",
+    "    #grads = tape.gradient(loss_value, self.embedding.trainable_weights)\n",
+    "    #optimizer.apply_gradients(zip(grads, self.embedding.trainable_weights))\n",
+    "\n",
+    "    #if step % 100 == 0:\n",
+    "    #    log.info(\"Training loss (for one batch) at step %d: %.4f\"\n",
+    "    #        % (step, float(loss_value)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "030c078f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/README.md b/README.md
index aec74f3..8af6893 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ Creating a hyperbolic neural network using Keras:
 ```python
 import tensorflow as tf
 from tensorflow import keras
-from hyperlib.nn.layers.lin_hyp import LinearHyperbolic
+from hyperlib.nn.layers.linear import LinearHyperbolic
 from hyperlib.nn.optimizers.rsgd import RSGD
 from hyperlib.manifold.poincare import Poincare
 
diff --git a/hyperlib/manifold/lorentz.py b/hyperlib/manifold/lorentz.py
index 6e923f1..9c493ab 100644
--- a/hyperlib/manifold/lorentz.py
+++ b/hyperlib/manifold/lorentz.py
@@ -7,7 +7,7 @@
 class Lorentz(Manifold):
     """
     Implementation of the Lorentz/Hyperboloid manifold defined by
-    :math: `L = \{ x \in R^d | -x_0^2 + x_1^2 + ... + x_d^2 = -K \}`, 
+    :math: `L = \{ x \in R^d | -x_0^2 + x_1^2 + ... + x_d^2 = -K \}`,
     where c = 1 / K is the hyperbolic curvature and d is the manifold dimension.
 
     The point :math: `( \sqrt{K}, 0, \dots, 0 )` is referred to as "zero".
@@ -35,7 +35,7 @@ def minkowski_norm(self, u, keepdim=True):
     def dist_squared(self, x, y, c):
         """Squared hyperbolic distance between x, y"""
         K = 1. / c
-        theta = tf.clip_by_value( -self.minkowski_dot(x, y) / K, 
+        theta = tf.clip_by_value( -self.minkowski_dot(x, y) / K,
             clip_value_min=1.0 + self.eps[x.dtype], clip_value_max=self.max_norm)
         return K * arcosh(theta)**2
 
@@ -44,9 +44,9 @@ def proj(self, x, c):
         K = 1. / c
         d1 = x.shape[-1]
         y = x[:,1:d1]
-        y_sqnorm = tf.math.square( 
+        y_sqnorm = tf.math.square(
             tf.norm(y, ord=2, axis=1, keepdims=True))
-        t = tf.clip_by_value(K + y_sqnorm, 
+        t = tf.clip_by_value(K + y_sqnorm,
             clip_value_min=self.eps[x.dtype],
             clip_value_max=self.max_norm
         )
@@ -70,7 +70,7 @@ def proj_tan0(self, u, c):
         return tf.concat([z, ud], axis=1)
 
     def expmap(self, u, x, c):
-        """Maps vector u in the tangent space at x onto the manifold""" 
+        """Maps vector u in the tangent space at x onto the manifold"""
         K = 1. / c
         sqrtK = K ** 0.5
         normu = self.minkowski_norm(u)
@@ -83,8 +83,8 @@ def expmap(self, u, x, c):
     def logmap(self, y, x, c):
         """Maps point y in the manifold to the tangent space at x"""
         K = 1. / c
-        xy = tf.clip_by_value(self.minkowski_dot(x, y) + K, 
-            clip_value_min=-self.max_norm, clip_value_max=-self.eps[x.dtype]) 
+        xy = tf.clip_by_value(self.minkowski_dot(x, y) + K,
+            clip_value_min=-self.max_norm, clip_value_max=-self.eps[x.dtype])
         xy -= K
         u = y + xy * x * c
         normu = self.minkowski_norm(u)
@@ -99,7 +99,8 @@ def hyp_act(self, act, x, c_in, c_out):
         return self.proj(self.expmap0(xt, c=c_out), c=c_out)
 
     def expmap0(self, u, c):
-        """Maps vector u in the tangent space at zero onto the manifold""" 
+        """Maps vector u in the tangent space at zero onto the manifold"""
+        print('x shape empmap0', u.shape)
         K = 1. / c
         sqrtK = K ** 0.5
         d = u.shape[-1]
@@ -125,10 +126,16 @@ def logmap0(self, x, c):
         y = tf.reshape(x[:,1:], [-1, d-1])
         y_norm = tf.norm(y, ord=2, axis=1, keepdims=True)
         y_norm = self.clip_norm(y_norm)
-        theta = tf.clip_by_value(x[:, 0:1] / sqrtK, 
-            clip_value_min=1.0+self.eps[x.dtype], clip_value_max=self.max_norm)
+
+        theta = tf.clip_by_value(
+            x[:, 0:1] / sqrtK,
+            clip_value_min=1.0+self.eps[x.dtype],
+            clip_value_max=self.max_norm
+        )
+        
         res = sqrtK * arcosh(theta) * y / y_norm
-        zeros = tf.zeros((b,1), dtype=res.dtype)
+        
+        zeros = tf.zeros((b, 1), dtype=res.dtype)
         return tf.concat([zeros, res], axis=1)
 
     def mobius_add(self, x, y, c):
@@ -138,7 +145,7 @@ def mobius_add(self, x, y, c):
 
     def mobius_matvec(self, m, x, c):
         u = self.logmap0(x, c)
-        mu = u @ m 
+        mu = u @ m
         return self.expmap0(mu, c)
 
     def ptransp(self, x, y, u, c):
@@ -178,3 +185,12 @@ def to_poincare(self, x, c):
 
     def clip_norm(self, x):
         return tf.clip_by_value(x, clip_value_min=self.min_norm, clip_value_max=self.max_norm)
+
+    def sqdist(self, x, y, c):
+        K = 1. / c
+        prod = self.minkowski_dot(x, y)
+        theta = tf.clip_by_value(-prod / K, clip_value_min=1.0 + self.eps[x.dtype], clip_value_max=tf.math.reduce_max(-prod / K))
+        sqdist = K * arcosh(theta) ** 2
+        # clamp distance to avoid nans in Fermi-Dirac decoder
+        res = tf.clip_by_value(sqdist, clip_value_min= tf.math.reduce_min(sqdist), clip_value_max=50.0)
+        return res
diff --git a/hyperlib/manifold/poincare.py b/hyperlib/manifold/poincare.py
index 1bb7da9..75b8152 100644
--- a/hyperlib/manifold/poincare.py
+++ b/hyperlib/manifold/poincare.py
@@ -136,7 +136,7 @@ def proj(self, x, c):
         projected = x / norm * maxnorm
         return tf.where(cond, projected, x)
 
-    def mobius_add(self, x, y, c):
+    def mobius_add(self, x, y, c, axis=-1):
         """Element-wise Mobius addition.
       Args:
         x: Tensor of size B x dimension representing hyperbolic points.
@@ -146,9 +146,9 @@ def mobius_add(self, x, y, c):
         Tensor of shape B x dimension representing the element-wise Mobius addition
         of x and y.
       """
-        cx2 = c * tf.reduce_sum(x * x, axis=-1, keepdims=True)
-        cy2 = c * tf.reduce_sum(y * y, axis=-1, keepdims=True)
-        cxy = c * tf.reduce_sum(x * y, axis=-1, keepdims=True)
+        cx2 = c * tf.reduce_sum(x * x, axis=axis, keepdims=True)
+        cy2 = c * tf.reduce_sum(y * y, axis=axis, keepdims=True)
+        cxy = c * tf.reduce_sum(x * y, axis=axis, keepdims=True)
         num = (1 + 2 * cxy + cy2) * x + (1 - cx2) * y
         denom = 1 + 2 * cxy + cx2 * cy2
         return self.proj(num / tf.maximum(denom, self.min_norm), c)
@@ -174,3 +174,11 @@ def single_query_attn_scores(self, key, query, c):
         scores = (1. / denom) * scores
         return scores
 
+    def sqdist(self, p1, p2, c):
+        sqrt_c = c ** 0.5
+        dist_c = atanh(
+            sqrt_c * self.mobius_add(-p1, p2, c, dim=-1)
+        )
+        dist_c = tf.norm(dist_c, axis=-1, ord=2, keepdim=False)
+        dist = dist_c * 2 / sqrt_c
+        return dist ** 2
diff --git a/hyperlib/models/__init__.py b/hyperlib/models/__init__.py
index e69de29..328a7ff 100644
--- a/hyperlib/models/__init__.py
+++ b/hyperlib/models/__init__.py
@@ -0,0 +1,83 @@
+import logging
+
+import tensorflow as tf
+from tensorflow import keras
+
+from hyperlib.loss.constrastive_loss import contrastive_loss
+from hyperlib.manifold.lorentz import Lorentz
+from hyperlib.manifold.poincare import Poincare
+from hyperlib.nn.layers.graph import HGCLayer
+
+
+log = logging.getLogger(__name__)
+
+
+class HGCN(tf.keras.Model):
+    """
+    Hierarchical Embeddings model from Poincaré Embeddings for
+    Learning Hierarchical Representations by Nickel and Keila
+    Please find an example of how to use this model in hyperlib/examples/wordnet_embedding.py
+    """
+
+    def __init__(self, vocab, embedding_dim=2, manifold=Poincare, c=1.0, clip_value=0.9):
+        super().__init__()
+
+        initializer=keras.initializers.RandomUniform(minval=-0.001, maxval=0.001, seed=None)
+        self.string_lookup = keras.layers.StringLookup(vocabulary=vocab, name="string_lookup")
+        self.embedding =  keras.layers.Embedding(
+            len(vocab)+1,
+            embedding_dim,
+            embeddings_initializer=initializer,
+            name="embeddings",
+        )
+        self.vocab = vocab
+        self.manifold = manifold()
+        self.c = c
+        self.clip_value = clip_value
+
+    def call(self, inputs):
+        indices = self.string_lookup(inputs)
+        return self.embedding(indices)
+
+    def get_embeddings(self):
+        embeddings = self.embedding(tf.constant([i for i in range(len(self.vocab))]))
+        embeddings_copy = tf.identity(embeddings)
+        embeddings_hyperbolic = self.manifold.expmap0(embeddings_copy, c=self.c)
+        return embeddings_hyperbolic
+
+    def get_vocabulary(self):
+        return self.vocab
+
+    @staticmethod
+    def get_model(vocab, embedding_dim=2):
+        embedding_dim=2
+        initializer=keras.initializers.RandomUniform(minval=-0.001, maxval=0.001, seed=None)
+        string_lookup_layer = keras.layers.StringLookup(vocabulary=vocab)
+
+        emb_layer = keras.layers.Embedding(
+            len(vocab)+1,
+            embedding_dim,
+            embeddings_initializer=initializer,
+            name="embeddings",
+        )
+
+        model = keras.Sequential([string_lookup_layer, emb_layer])
+        return model
+
+    def fit(self, train_dataset, optimizer, epochs=100):
+
+        for epoch in range(epochs):
+            log.info("Epoch %d" % (epoch,))
+            for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
+                with tf.GradientTape() as tape:
+                    pos_embs = self.embedding(self.string_lookup(x_batch_train))
+                    neg_embs = self.embedding(self.string_lookup(y_batch_train))
+                    loss_value = contrastive_loss(
+                            pos_embs, neg_embs, self.manifold, c=self.c, clip_value=self.clip_value)
+
+                grads = tape.gradient(loss_value, self.embedding.trainable_weights)
+                optimizer.apply_gradients(zip(grads, self.embedding.trainable_weights))
+
+                if step % 100 == 0:
+                    log.info("Training loss (for one batch) at step %d: %.4f"
+                        % (step, float(loss_value)))
diff --git a/hyperlib/nn/layers/graph.py b/hyperlib/nn/layers/graph.py
new file mode 100644
index 0000000..e30d3f1
--- /dev/null
+++ b/hyperlib/nn/layers/graph.py
@@ -0,0 +1,99 @@
+import tensorflow as tf
+from tensorflow import keras
+
+from .linear import LinearHyperbolic, ActivationHyperbolic
+from hyperlib.manifold.lorentz import Lorentz
+from hyperlib.manifold.poincare import Poincare
+
+
+class HyperbolicAggregation(keras.layers.Layer):
+
+    def __init__(self, manifold, c):
+        super().__init__()
+        self.manifold = manifold
+        self.c = c
+
+    def call(self, inputs):
+        x_tangent, adj = inputs
+        support_t = tf.sparse.sparse_dense_matmul(adj, x_tangent)
+        #support_t = tf.linalg.matmul(adj, x_tangent)
+        output = self.manifold.proj(self.manifold.expmap0(support_t, c=self.c), c=self.c)
+        return output
+
+
+class HGCLayer(keras.layers.Layer):
+    def __init__(self, manifold, input_size, c, activation):
+        super().__init__()
+
+        self.manifold = manifold
+        self.c = tf.Variable([c], trainable=False)
+        self.linear_layer = LinearHyperbolic(input_size, self.manifold, self.c, activation=None)
+        #self.linear_layer = LinearHyperbolic(1433, self.manifold, 1.0, activation=None)
+        self.aggregation_layer = HyperbolicAggregation(self.manifold, self.c)
+        self.activation_layer = ActivationHyperbolic(self.manifold, self.c, self.c, activation)
+
+    def call(self, inputs):
+        # Step 1 (hyperbolic feature transform)
+        x, adj = inputs
+        # x = self.manifold.logmap0(x, c=self.c)
+
+        # Step 2 (attention-based neighborhood aggregation)
+        print('HGCLayer x shape', x.shape)
+        x = self.linear_layer(x)
+        x = self.aggregation_layer((x, adj))
+
+        # Step 3 (non-linear activation with different curvatures)
+        x = self.activation_layer(x)
+
+        return x
+
+
+class HGCNLP(keras.Model):
+
+    def __init__(self, input_size, dropout=0.4):
+        super().__init__()
+
+        self.input_size = input_size
+
+        self.manifold = Lorentz()
+        self.c_map = tf.Variable([0.4], trainable=False)
+        self.c0 = tf.Variable([0.4], trainable=False)
+        self.c1 = tf.Variable([0.4], trainable=False)
+        self.c2 = tf.Variable([0.4], trainable=False)
+
+        self.conv0 = HGCLayer(self.manifold, self.input_size, self.c0, activation="relu")
+        self.conv1 = HGCLayer(self.manifold, self.input_size, self.c0, activation="relu")
+        self.conv2 = HGCLayer(self.manifold, self.input_size, self.c0, activation="relu")
+
+    def call(self, inputs):
+        x, adj = inputs
+        x_tan = self.manifold.proj_tan0(x, self.c1)
+        x_hyp = self.manifold.expmap0(x_tan, c=self.c1)
+        x_hyp = self.manifold.proj(x_hyp, c=self.c1)
+        # Map euclidean features to Hyperbolic space
+        #x = self.manifold.expmap0(x, c=self.c_map)
+        # Stack multiple hyperbolic graph convolution layers
+        x, adj = self.conv0((x, adj))
+        #x, adj = self.conv1((x, adj))
+        #x, adj = self.conv2((x, adj))
+
+        # TODO - add link prediction/node classification code as described
+        # in the notes below
+        # Notes
+        # Note 1:  Hyperbolic embeddings at the last layer can then be used to predict node attributes or links
+        # Note 2: For link prediction we use the Fermi-Dirac decoder , a generalization of sigmoid,
+        #         to compute probability scores for edges. We then train HGCN by minimizing the
+        #         cross-entropy loss using negative sampling
+        # Note 3: For node classification  map the output of the last HGCN layer to tangent space of the origin with the
+        #         logarithmic map and then perform Euclidean multinomial logistic regression. Note that another possibility
+        #         is to directly classify points on the hyperboloid manifold using the hyperbolic multinomial logistic loss.
+        #         This method performs similarly to Euclidean classification. Finally, we also add a link prediction
+        #         regularization objective in node classification tasks, to encourage embeddings at the last layer to
+        #         preserve the graph structure
+        return
+
+    def decode(self, emb_in, emb_out):
+        sqdist = self.manifold.sqdist(emb_in, emb_out, self.c)
+        # fermi dirac to comput edge probabilities
+        1. / (tf.exp((sqdist - self.r) / self.t) + 1.0)
+        return probs
diff --git a/hyperlib/nn/layers/lin_hyp.py b/hyperlib/nn/layers/linear.py
similarity index 63%
rename from hyperlib/nn/layers/lin_hyp.py
rename to hyperlib/nn/layers/linear.py
index 37f7a49..7271512 100644
--- a/hyperlib/nn/layers/lin_hyp.py
+++ b/hyperlib/nn/layers/linear.py
@@ -7,13 +7,14 @@ class LinearHyperbolic(keras.layers.Layer):
     Implementation of a hyperbolic linear layer for a neural network, that inherits from the keras Layer class
     """
 
-    def __init__(self, units, manifold, c, activation=None, use_bias=True):
+    def __init__(self, units, manifold, c, use_activation=False, activation=None, use_bias=False):
         super().__init__()
         self.units = units
         self.c = tf.Variable([c], dtype="float64")
         self.manifold = manifold
         self.activation = keras.activations.get(activation)
         self.use_bias = use_bias
+        self.use_activation = use_activation
 
     def build(self, batch_input_shape):
         w_init = tf.random_normal_initializer()
@@ -40,14 +41,16 @@ def call(self, inputs):
         inputs = tf.cast(inputs, tf.float64)
         mv = self.manifold.mobius_matvec(self.kernel, inputs, self.c)
         res = self.manifold.proj(mv, c=self.c)
-
+        print('LinearHyperbolic x shape', inputs.shape)
         if self.use_bias:
             hyp_bias = self.manifold.expmap0(self.bias, c=self.c)
             hyp_bias = self.manifold.proj(hyp_bias, c=self.c)
             res = self.manifold.mobius_add(res, hyp_bias, c=self.c)
             res = self.manifold.proj(res, c=self.c)
 
-        return self.activation(res)
+        if self.use_activation:
+            self.activation(res)
+        return res
 
     def get_config(self):
         base_config = super().get_config()
@@ -58,3 +61,28 @@ def get_config(self):
             "manifold": self.manifold,
             "curvature": self.c
         }
+
+class ActivationHyperbolic(keras.layers.Layer):
+    def __init__(self, manifold, c_in, c_out, activation):
+        super().__init__()
+        self.activation = keras.activations.get(activation)
+        self.c_in = c_in
+        self.c_out = c_out
+        self.manifold = manifold
+
+    def build(self, input_shape):
+        self.built = True
+
+    def call(self, inputs):
+        inputs_tan = self.activation(self.manifold.logmap0(inputs, c=self.c_in))
+        inputs_tan = self.manifold.proj_tan0(inputs_tan, self.activation(inputs))
+        out = self.manifold.expmap0(inputs_tan, c=self.c_out)
+        return self.manifold.proj(out, c=self.c_out)
+
+    def get_config(self):
+        return {
+            "activation": keras.activations.serialize(self.activation),
+            "c_in": self.c_in,
+            "c_out": self.c_out,
+            "manifold": self.manifold.name,
+        }
diff --git a/hyperlib/utils/data_utils.py b/hyperlib/utils/data_utils.py
new file mode 100644
index 0000000..8f7b73c
--- /dev/null
+++ b/hyperlib/utils/data_utils.py
@@ -0,0 +1,256 @@
+"""Data utils functions for pre-processing and data loading."""
+import os
+import pickle as pkl
+import sys
+
+import networkx as nx
+import numpy as np
+import scipy.sparse as sp
+import torch
+
+
+def load_data(args, datapath):
+    if args.task == 'nc':
+        data = load_data_nc(args.dataset, args.use_feats, datapath, args.split_seed)
+    else:
+        data = load_data_lp(args.dataset, args.use_feats, datapath)
+        adj = data['adj_train']
+        if args.task == 'lp':
+            adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = mask_edges(
+                    adj, args.val_prop, args.test_prop, args.split_seed
+            )
+            data['adj_train'] = adj_train
+            data['train_edges'], data['train_edges_false'] = train_edges, train_edges_false
+            data['val_edges'], data['val_edges_false'] = val_edges, val_edges_false
+            data['test_edges'], data['test_edges_false'] = test_edges, test_edges_false
+    data['adj_train_norm'], data['features'] = process(
+            data['adj_train'], data['features'], args.normalize_adj, args.normalize_feats
+    )
+    if args.dataset == 'airport':
+        data['features'] = augment(data['adj_train'], data['features'])
+    return data
+
+
+# ############### FEATURES PROCESSING ####################################
+
+
+def process(adj, features, normalize_adj, normalize_feats):
+    if sp.isspmatrix(features):
+        features = np.array(features.todense())
+    if normalize_feats:
+        features = normalize(features)
+    features = torch.Tensor(features)
+    if normalize_adj:
+        adj = normalize(adj + sp.eye(adj.shape[0]))
+    adj = sparse_mx_to_torch_sparse_tensor(adj)
+    return adj, features
+
+
+def normalize(mx):
+    """Row-normalize sparse matrix."""
+    rowsum = np.array(mx.sum(1))
+    r_inv = np.power(rowsum, -1).flatten()
+    r_inv[np.isinf(r_inv)] = 0.
+    r_mat_inv = sp.diags(r_inv)
+    mx = r_mat_inv.dot(mx)
+    return mx
+
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo()
+    indices = torch.from_numpy(
+            np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
+    values = torch.Tensor(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+
+def augment(adj, features, normalize_feats=True):
+    deg = np.squeeze(np.sum(adj, axis=0).astype(int))
+    deg[deg > 5] = 5
+    deg_onehot = torch.tensor(np.eye(6)[deg], dtype=torch.float).squeeze()
+    const_f = torch.ones(features.size(0), 1)
+    features = torch.cat((features, deg_onehot, const_f), dim=1)
+    return features
+
+
+# ############### DATA SPLITS #####################################################
+
+
+def mask_edges(adj, val_prop, test_prop, seed):
+    np.random.seed(seed)  # get tp edges
+    x, y = sp.triu(adj).nonzero()
+    pos_edges = np.array(list(zip(x, y)))
+    np.random.shuffle(pos_edges)
+    # get tn edges
+    x, y = sp.triu(sp.csr_matrix(1. - adj.toarray())).nonzero()
+    neg_edges = np.array(list(zip(x, y)))
+    np.random.shuffle(neg_edges)
+
+    m_pos = len(pos_edges)
+    n_val = int(m_pos * val_prop)
+    n_test = int(m_pos * test_prop)
+    val_edges, test_edges, train_edges = pos_edges[:n_val], pos_edges[n_val:n_test + n_val], pos_edges[n_test + n_val:]
+    val_edges_false, test_edges_false = neg_edges[:n_val], neg_edges[n_val:n_test + n_val]
+    train_edges_false = np.concatenate([neg_edges, val_edges, test_edges], axis=0)
+    adj_train = sp.csr_matrix((np.ones(train_edges.shape[0]), (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
+    adj_train = adj_train + adj_train.T
+    return adj_train, torch.LongTensor(train_edges), torch.LongTensor(train_edges_false), torch.LongTensor(val_edges), \
+           torch.LongTensor(val_edges_false), torch.LongTensor(test_edges), torch.LongTensor(
+            test_edges_false)  
+
+
+def split_data(labels, val_prop, test_prop, seed):
+    np.random.seed(seed)
+    nb_nodes = labels.shape[0]
+    all_idx = np.arange(nb_nodes)
+    pos_idx = labels.nonzero()[0]
+    neg_idx = (1. - labels).nonzero()[0]
+    np.random.shuffle(pos_idx)
+    np.random.shuffle(neg_idx)
+    pos_idx = pos_idx.tolist()
+    neg_idx = neg_idx.tolist()
+    nb_pos_neg = min(len(pos_idx), len(neg_idx))
+    nb_val = round(val_prop * nb_pos_neg)
+    nb_test = round(test_prop * nb_pos_neg)
+    idx_val_pos, idx_test_pos, idx_train_pos = pos_idx[:nb_val], pos_idx[nb_val:nb_val + nb_test], pos_idx[
+                                                                                                   nb_val + nb_test:]
+    idx_val_neg, idx_test_neg, idx_train_neg = neg_idx[:nb_val], neg_idx[nb_val:nb_val + nb_test], neg_idx[
+                                                                                                   nb_val + nb_test:]
+    return idx_val_pos + idx_val_neg, idx_test_pos + idx_test_neg, idx_train_pos + idx_train_neg
+
+
+def bin_feat(feat, bins):
+    digitized = np.digitize(feat, bins)
+    return digitized - digitized.min()
+
+
+# ############### LINK PREDICTION DATA LOADERS ####################################
+
+
+def load_data_lp(dataset, use_feats, data_path):
+    if dataset in ['cora', 'pubmed']:
+        adj, features = load_citation_data(dataset, use_feats, data_path)[:2]
+    elif dataset == 'disease_lp':
+        adj, features = load_synthetic_data(dataset, use_feats, data_path)[:2]
+    elif dataset == 'airport':
+        adj, features = load_data_airport(dataset, data_path, return_label=False)
+    else:
+        raise FileNotFoundError('Dataset {} is not supported.'.format(dataset))
+    data = {'adj_train': adj, 'features': features}
+    return data
+
+
+# ############### NODE CLASSIFICATION DATA LOADERS ####################################
+
+
+def load_data_nc(dataset, use_feats, data_path, split_seed):
+    if dataset in ['cora', 'pubmed']:
+        adj, features, labels, idx_train, idx_val, idx_test = load_citation_data(
+            dataset, use_feats, data_path, split_seed
+        )
+    else:
+        if dataset == 'disease_nc':
+            adj, features, labels = load_synthetic_data(dataset, use_feats, data_path)
+            val_prop, test_prop = 0.10, 0.60
+        elif dataset == 'airport':
+            adj, features, labels = load_data_airport(dataset, data_path, return_label=True)
+            val_prop, test_prop = 0.15, 0.15
+        else:
+            raise FileNotFoundError('Dataset {} is not supported.'.format(dataset))
+        idx_val, idx_test, idx_train = split_data(labels, val_prop, test_prop, seed=split_seed)
+
+    labels = torch.LongTensor(labels)
+    data = {'adj_train': adj, 'features': features, 'labels': labels, 'idx_train': idx_train, 'idx_val': idx_val, 'idx_test': idx_test}
+    return data
+
+
+# ############### DATASETS ####################################
+
+
+def load_citation_data(dataset_str, use_feats, data_path, split_seed=None):
+    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
+    objects = []
+    for i in range(len(names)):
+        with open(os.path.join(data_path, "ind.{}.{}".format(dataset_str, names[i])), 'rb') as f:
+            if sys.version_info > (3, 0):
+                objects.append(pkl.load(f, encoding='latin1'))
+            else:
+                objects.append(pkl.load(f))
+
+    x, y, tx, ty, allx, ally, graph = tuple(objects)
+    test_idx_reorder = parse_index_file(os.path.join(data_path, "ind.{}.test.index".format(dataset_str)))
+    test_idx_range = np.sort(test_idx_reorder)
+
+    features = sp.vstack((allx, tx)).tolil()
+    features[test_idx_reorder, :] = features[test_idx_range, :]
+
+    labels = np.vstack((ally, ty))
+    labels[test_idx_reorder, :] = labels[test_idx_range, :]
+    labels = np.argmax(labels, 1)
+
+    idx_test = test_idx_range.tolist()
+    idx_train = list(range(len(y)))
+    idx_val = range(len(y), len(y) + 500)
+
+    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
+    if not use_feats:
+        features = sp.eye(adj.shape[0])
+    return adj, features, labels, idx_train, idx_val, idx_test
+
+
+def parse_index_file(filename):
+    index = []
+    for line in open(filename):
+        index.append(int(line.strip()))
+    return index
+
+
+def load_synthetic_data(dataset_str, use_feats, data_path):
+    object_to_idx = {}
+    idx_counter = 0
+    edges = []
+    with open(os.path.join(data_path, "{}.edges.csv".format(dataset_str)), 'r') as f:
+        all_edges = f.readlines()
+    for line in all_edges:
+        n1, n2 = line.rstrip().split(',')
+        if n1 in object_to_idx:
+            i = object_to_idx[n1]
+        else:
+            i = idx_counter
+            object_to_idx[n1] = i
+            idx_counter += 1
+        if n2 in object_to_idx:
+            j = object_to_idx[n2]
+        else:
+            j = idx_counter
+            object_to_idx[n2] = j
+            idx_counter += 1
+        edges.append((i, j))
+    adj = np.zeros((len(object_to_idx), len(object_to_idx)))
+    for i, j in edges:
+        adj[i, j] = 1.  # comment this line for directed adjacency matrix
+        adj[j, i] = 1.
+    if use_feats:
+        features = sp.load_npz(os.path.join(data_path, "{}.feats.npz".format(dataset_str)))
+    else:
+        features = sp.eye(adj.shape[0])
+    labels = np.load(os.path.join(data_path, "{}.labels.npy".format(dataset_str)))
+    return sp.csr_matrix(adj), features, labels
+
+
+def load_data_airport(dataset_str, data_path, return_label=False):
+    graph = pkl.load(open(os.path.join(data_path, dataset_str + '.p'), 'rb'))
+    adj = nx.adjacency_matrix(graph)
+    features = np.array([graph.node[u]['feat'] for u in graph.nodes()])
+    if return_label:
+        label_idx = 4
+        labels = features[:, label_idx]
+        features = features[:, :label_idx]
+        labels = bin_feat(labels, bins=[7.0/7, 8.0/7, 9.0/7])
+        return sp.csr_matrix(adj), features, labels
+    else:
+        return sp.csr_matrix(adj), features
+
diff --git a/resources/data/hgcn/cora/ind.cora.allx b/resources/data/hgcn/cora/ind.cora.allx
new file mode 100644
index 0000000..44d53b1
Binary files /dev/null and b/resources/data/hgcn/cora/ind.cora.allx differ
diff --git a/resources/data/hgcn/cora/ind.cora.ally b/resources/data/hgcn/cora/ind.cora.ally
new file mode 100644
index 0000000..04fbd0b
Binary files /dev/null and b/resources/data/hgcn/cora/ind.cora.ally differ
diff --git a/resources/data/hgcn/cora/ind.cora.graph b/resources/data/hgcn/cora/ind.cora.graph
new file mode 100644
index 0000000..4d3bf85
Binary files /dev/null and b/resources/data/hgcn/cora/ind.cora.graph differ
diff --git a/resources/data/hgcn/cora/ind.cora.test.index b/resources/data/hgcn/cora/ind.cora.test.index
new file mode 100644
index 0000000..ded8092
--- /dev/null
+++ b/resources/data/hgcn/cora/ind.cora.test.index
@@ -0,0 +1,1000 @@
+2692
+2532
+2050
+1715
+2362
+2609
+2622
+1975
+2081
+1767
+2263
+1725
+2588
+2259
+2357
+1998
+2574
+2179
+2291
+2382
+1812
+1751
+2422
+1937
+2631
+2510
+2378
+2589
+2345
+1943
+1850
+2298
+1825
+2035
+2507
+2313
+1906
+1797
+2023
+2159
+2495
+1886
+2122
+2369
+2461
+1925
+2565
+1858
+2234
+2000
+1846
+2318
+1723
+2559
+2258
+1763
+1991
+1922
+2003
+2662
+2250
+2064
+2529
+1888
+2499
+2454
+2320
+2287
+2203
+2018
+2002
+2632
+2554
+2314
+2537
+1760
+2088
+2086
+2218
+2605
+1953
+2403
+1920
+2015
+2335
+2535
+1837
+2009
+1905
+2636
+1942
+2193
+2576
+2373
+1873
+2463
+2509
+1954
+2656
+2455
+2494
+2295
+2114
+2561
+2176
+2275
+2635
+2442
+2704
+2127
+2085
+2214
+2487
+1739
+2543
+1783
+2485
+2262
+2472
+2326
+1738
+2170
+2100
+2384
+2152
+2647
+2693
+2376
+1775
+1726
+2476
+2195
+1773
+1793
+2194
+2581
+1854
+2524
+1945
+1781
+1987
+2599
+1744
+2225
+2300
+1928
+2042
+2202
+1958
+1816
+1916
+2679
+2190
+1733
+2034
+2643
+2177
+1883
+1917
+1996
+2491
+2268
+2231
+2471
+1919
+1909
+2012
+2522
+1865
+2466
+2469
+2087
+2584
+2563
+1924
+2143
+1736
+1966
+2533
+2490
+2630
+1973
+2568
+1978
+2664
+2633
+2312
+2178
+1754
+2307
+2480
+1960
+1742
+1962
+2160
+2070
+2553
+2433
+1768
+2659
+2379
+2271
+1776
+2153
+1877
+2027
+2028
+2155
+2196
+2483
+2026
+2158
+2407
+1821
+2131
+2676
+2277
+2489
+2424
+1963
+1808
+1859
+2597
+2548
+2368
+1817
+2405
+2413
+2603
+2350
+2118
+2329
+1969
+2577
+2475
+2467
+2425
+1769
+2092
+2044
+2586
+2608
+1983
+2109
+2649
+1964
+2144
+1902
+2411
+2508
+2360
+1721
+2005
+2014
+2308
+2646
+1949
+1830
+2212
+2596
+1832
+1735
+1866
+2695
+1941
+2546
+2498
+2686
+2665
+1784
+2613
+1970
+2021
+2211
+2516
+2185
+2479
+2699
+2150
+1990
+2063
+2075
+1979
+2094
+1787
+2571
+2690
+1926
+2341
+2566
+1957
+1709
+1955
+2570
+2387
+1811
+2025
+2447
+2696
+2052
+2366
+1857
+2273
+2245
+2672
+2133
+2421
+1929
+2125
+2319
+2641
+2167
+2418
+1765
+1761
+1828
+2188
+1972
+1997
+2419
+2289
+2296
+2587
+2051
+2440
+2053
+2191
+1923
+2164
+1861
+2339
+2333
+2523
+2670
+2121
+1921
+1724
+2253
+2374
+1940
+2545
+2301
+2244
+2156
+1849
+2551
+2011
+2279
+2572
+1757
+2400
+2569
+2072
+2526
+2173
+2069
+2036
+1819
+1734
+1880
+2137
+2408
+2226
+2604
+1771
+2698
+2187
+2060
+1756
+2201
+2066
+2439
+1844
+1772
+2383
+2398
+1708
+1992
+1959
+1794
+2426
+2702
+2444
+1944
+1829
+2660
+2497
+2607
+2343
+1730
+2624
+1790
+1935
+1967
+2401
+2255
+2355
+2348
+1931
+2183
+2161
+2701
+1948
+2501
+2192
+2404
+2209
+2331
+1810
+2363
+2334
+1887
+2393
+2557
+1719
+1732
+1986
+2037
+2056
+1867
+2126
+1932
+2117
+1807
+1801
+1743
+2041
+1843
+2388
+2221
+1833
+2677
+1778
+2661
+2306
+2394
+2106
+2430
+2371
+2606
+2353
+2269
+2317
+2645
+2372
+2550
+2043
+1968
+2165
+2310
+1985
+2446
+1982
+2377
+2207
+1818
+1913
+1766
+1722
+1894
+2020
+1881
+2621
+2409
+2261
+2458
+2096
+1712
+2594
+2293
+2048
+2359
+1839
+2392
+2254
+1911
+2101
+2367
+1889
+1753
+2555
+2246
+2264
+2010
+2336
+2651
+2017
+2140
+1842
+2019
+1890
+2525
+2134
+2492
+2652
+2040
+2145
+2575
+2166
+1999
+2434
+1711
+2276
+2450
+2389
+2669
+2595
+1814
+2039
+2502
+1896
+2168
+2344
+2637
+2031
+1977
+2380
+1936
+2047
+2460
+2102
+1745
+2650
+2046
+2514
+1980
+2352
+2113
+1713
+2058
+2558
+1718
+1864
+1876
+2338
+1879
+1891
+2186
+2451
+2181
+2638
+2644
+2103
+2591
+2266
+2468
+1869
+2582
+2674
+2361
+2462
+1748
+2215
+2615
+2236
+2248
+2493
+2342
+2449
+2274
+1824
+1852
+1870
+2441
+2356
+1835
+2694
+2602
+2685
+1893
+2544
+2536
+1994
+1853
+1838
+1786
+1930
+2539
+1892
+2265
+2618
+2486
+2583
+2061
+1796
+1806
+2084
+1933
+2095
+2136
+2078
+1884
+2438
+2286
+2138
+1750
+2184
+1799
+2278
+2410
+2642
+2435
+1956
+2399
+1774
+2129
+1898
+1823
+1938
+2299
+1862
+2420
+2673
+1984
+2204
+1717
+2074
+2213
+2436
+2297
+2592
+2667
+2703
+2511
+1779
+1782
+2625
+2365
+2315
+2381
+1788
+1714
+2302
+1927
+2325
+2506
+2169
+2328
+2629
+2128
+2655
+2282
+2073
+2395
+2247
+2521
+2260
+1868
+1988
+2324
+2705
+2541
+1731
+2681
+2707
+2465
+1785
+2149
+2045
+2505
+2611
+2217
+2180
+1904
+2453
+2484
+1871
+2309
+2349
+2482
+2004
+1965
+2406
+2162
+1805
+2654
+2007
+1947
+1981
+2112
+2141
+1720
+1758
+2080
+2330
+2030
+2432
+2089
+2547
+1820
+1815
+2675
+1840
+2658
+2370
+2251
+1908
+2029
+2068
+2513
+2549
+2267
+2580
+2327
+2351
+2111
+2022
+2321
+2614
+2252
+2104
+1822
+2552
+2243
+1798
+2396
+2663
+2564
+2148
+2562
+2684
+2001
+2151
+2706
+2240
+2474
+2303
+2634
+2680
+2055
+2090
+2503
+2347
+2402
+2238
+1950
+2054
+2016
+1872
+2233
+1710
+2032
+2540
+2628
+1795
+2616
+1903
+2531
+2567
+1946
+1897
+2222
+2227
+2627
+1856
+2464
+2241
+2481
+2130
+2311
+2083
+2223
+2284
+2235
+2097
+1752
+2515
+2527
+2385
+2189
+2283
+2182
+2079
+2375
+2174
+2437
+1993
+2517
+2443
+2224
+2648
+2171
+2290
+2542
+2038
+1855
+1831
+1759
+1848
+2445
+1827
+2429
+2205
+2598
+2657
+1728
+2065
+1918
+2427
+2573
+2620
+2292
+1777
+2008
+1875
+2288
+2256
+2033
+2470
+2585
+2610
+2082
+2230
+1915
+1847
+2337
+2512
+2386
+2006
+2653
+2346
+1951
+2110
+2639
+2520
+1939
+2683
+2139
+2220
+1910
+2237
+1900
+1836
+2197
+1716
+1860
+2077
+2519
+2538
+2323
+1914
+1971
+1845
+2132
+1802
+1907
+2640
+2496
+2281
+2198
+2416
+2285
+1755
+2431
+2071
+2249
+2123
+1727
+2459
+2304
+2199
+1791
+1809
+1780
+2210
+2417
+1874
+1878
+2116
+1961
+1863
+2579
+2477
+2228
+2332
+2578
+2457
+2024
+1934
+2316
+1841
+1764
+1737
+2322
+2239
+2294
+1729
+2488
+1974
+2473
+2098
+2612
+1834
+2340
+2423
+2175
+2280
+2617
+2208
+2560
+1741
+2600
+2059
+1747
+2242
+2700
+2232
+2057
+2147
+2682
+1792
+1826
+2120
+1895
+2364
+2163
+1851
+2391
+2414
+2452
+1803
+1989
+2623
+2200
+2528
+2415
+1804
+2146
+2619
+2687
+1762
+2172
+2270
+2678
+2593
+2448
+1882
+2257
+2500
+1899
+2478
+2412
+2107
+1746
+2428
+2115
+1800
+1901
+2397
+2530
+1912
+2108
+2206
+2091
+1740
+2219
+1976
+2099
+2142
+2671
+2668
+2216
+2272
+2229
+2666
+2456
+2534
+2697
+2688
+2062
+2691
+2689
+2154
+2590
+2626
+2390
+1813
+2067
+1952
+2518
+2358
+1789
+2076
+2049
+2119
+2013
+2124
+2556
+2105
+2093
+1885
+2305
+2354
+2135
+2601
+1770
+1995
+2504
+1749
+2157
diff --git a/resources/data/hgcn/cora/ind.cora.tx b/resources/data/hgcn/cora/ind.cora.tx
new file mode 100644
index 0000000..6e856d7
Binary files /dev/null and b/resources/data/hgcn/cora/ind.cora.tx differ
diff --git a/resources/data/hgcn/cora/ind.cora.ty b/resources/data/hgcn/cora/ind.cora.ty
new file mode 100644
index 0000000..da1734a
Binary files /dev/null and b/resources/data/hgcn/cora/ind.cora.ty differ
diff --git a/resources/data/hgcn/cora/ind.cora.x b/resources/data/hgcn/cora/ind.cora.x
new file mode 100644
index 0000000..c4a91d0
Binary files /dev/null and b/resources/data/hgcn/cora/ind.cora.x differ
diff --git a/resources/data/hgcn/cora/ind.cora.y b/resources/data/hgcn/cora/ind.cora.y
new file mode 100644
index 0000000..58e30ef
Binary files /dev/null and b/resources/data/hgcn/cora/ind.cora.y differ
diff --git a/tests/test_poincare.py b/tests/test_poincare.py
index 0e9322c..e0721b4 100644
--- a/tests/test_poincare.py
+++ b/tests/test_poincare.py
@@ -1,7 +1,7 @@
 import tensorflow as tf
 from hyperlib.manifold import poincare
 from hyperlib.utils import functional as F
-from hyperlib.nn.layers import lin_hyp, dense_attention
+from hyperlib.nn.layers import linear, dense_attention
 from hyperlib.nn.optimizers import rsgd
 import pytest
 
@@ -54,7 +54,7 @@ def test_poincare_functions(self):
         assert manifold.min_norm == 1e-15
 
     def test_create_layer(self, units=32):
-        hyp_layer = lin_hyp.LinearHyperbolic(
+        hyp_layer = linear.LinearHyperbolic(
             units, self.poincare_manifold, 1.0 
         )
         assert hyp_layer.units == units
@@ -73,14 +73,14 @@ def test_attention_layer(self):
 
     def test_layer_training(self, units=32):
         x_input = tf.zeros([units, 1])
-        hyp_layer = lin_hyp.LinearHyperbolic(
+        hyp_layer = linear.LinearHyperbolic(
             units, self.poincare_manifold, 1.0 
         )
         output = hyp_layer(x_input)
 
     def test_layer_training_with_bias(self, units=32):
         x_input = tf.zeros([units, 1])
-        hyp_layer = lin_hyp.LinearHyperbolic(
+        hyp_layer = linear.LinearHyperbolic(
             units, self.poincare_manifold, 1.0, use_bias=True
         )
         output = hyp_layer(x_input)