diff --git a/notebooks/README.md b/notebooks/README.md
new file mode 100644
index 0000000..09d7c2f
--- /dev/null
+++ b/notebooks/README.md
@@ -0,0 +1,30 @@
+# Running the record api tooling on notebooks
+
+First of all, clone the record api repo
+
+```
+git clone https://github.com/data-apis/python-record-api.git
+```
+
+And create a new conda env for installing all the notebook's dependencies and the record api tooling.
+
+```
+conda create -n record-api python=3.8
+conda activate record-api
+cd python-record-api
+pip install record_api
+```
+
+We have an example of a notebook under `/notebooks/data`, so we need to transform it
+to a python file so the record api tooling work.
+
+```
+cd notebooks
+python notebook_to_python.py data
+```
+
+A new folder named `scripts` under the `notebooks` directory will be created with
+all the files converted into python files.
+
+Then, run the file `run_record_api.sh` to generate the inferred API calls in each notebook.
+The output will be under `infer_apis` directory as a json per notebook file.
diff --git a/notebooks/data/example.ipynb b/notebooks/data/example.ipynb
new file mode 100644
index 0000000..c29d8df
--- /dev/null
+++ b/notebooks/data/example.ipynb
@@ -0,0 +1,538 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s = pd.Series([1, 3, 5, np.nan, 6, 8])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dates = pd.date_range(\"20130101\", periods=6)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list(\"ABCD\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df2 = pd.DataFrame(\n",
+    "        {\n",
+    "            \"A\": 1.0,\n",
+    "            \"B\": pd.Timestamp(\"20130102\"),\n",
+    "            \"C\": pd.Series(1, index=list(range(4)), dtype=\"float32\"),\n",
+    "            \"D\": np.array([3] * 4, dtype=\"int32\"),\n",
+    "            \"E\": pd.Categorical([\"test\", \"train\", \"test\", \"train\"]),\n",
+    "            \"F\": \"foo\",\n",
+    "        }\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>A</th>\n",
+       "      <th>B</th>\n",
+       "      <th>C</th>\n",
+       "      <th>D</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2013-01-01</th>\n",
+       "      <td>-0.328603</td>\n",
+       "      <td>-0.366380</td>\n",
+       "      <td>-0.619592</td>\n",
+       "      <td>-2.822423</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-02</th>\n",
+       "      <td>-0.427067</td>\n",
+       "      <td>0.317282</td>\n",
+       "      <td>-0.938094</td>\n",
+       "      <td>-0.148115</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-03</th>\n",
+       "      <td>-3.012013</td>\n",
+       "      <td>-0.535066</td>\n",
+       "      <td>0.790945</td>\n",
+       "      <td>0.209057</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-04</th>\n",
+       "      <td>0.762696</td>\n",
+       "      <td>0.243109</td>\n",
+       "      <td>1.879514</td>\n",
+       "      <td>1.108437</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-05</th>\n",
+       "      <td>-0.402783</td>\n",
+       "      <td>-0.876177</td>\n",
+       "      <td>-0.093985</td>\n",
+       "      <td>0.367029</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                   A         B         C         D\n",
+       "2013-01-01 -0.328603 -0.366380 -0.619592 -2.822423\n",
+       "2013-01-02 -0.427067  0.317282 -0.938094 -0.148115\n",
+       "2013-01-03 -3.012013 -0.535066  0.790945  0.209057\n",
+       "2013-01-04  0.762696  0.243109  1.879514  1.108437\n",
+       "2013-01-05 -0.402783 -0.876177 -0.093985  0.367029"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>A</th>\n",
+       "      <th>B</th>\n",
+       "      <th>C</th>\n",
+       "      <th>D</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2013-01-04</th>\n",
+       "      <td>0.762696</td>\n",
+       "      <td>0.243109</td>\n",
+       "      <td>1.879514</td>\n",
+       "      <td>1.108437</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-05</th>\n",
+       "      <td>-0.402783</td>\n",
+       "      <td>-0.876177</td>\n",
+       "      <td>-0.093985</td>\n",
+       "      <td>0.367029</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-06</th>\n",
+       "      <td>-1.049293</td>\n",
+       "      <td>0.837073</td>\n",
+       "      <td>1.210975</td>\n",
+       "      <td>-1.206224</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                   A         B         C         D\n",
+       "2013-01-04  0.762696  0.243109  1.879514  1.108437\n",
+       "2013-01-05 -0.402783 -0.876177 -0.093985  0.367029\n",
+       "2013-01-06 -1.049293  0.837073  1.210975 -1.206224"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.tail(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n",
+       "               '2013-01-05', '2013-01-06'],\n",
+       "              dtype='datetime64[ns]', freq='D')"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['A', 'B', 'C', 'D'], dtype='object')"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.32860292, -0.36637983, -0.61959229, -2.82242287],\n",
+       "       [-0.42706726,  0.3172823 , -0.93809362, -0.14811456],\n",
+       "       [-3.01201345, -0.53506589,  0.79094519,  0.20905686],\n",
+       "       [ 0.76269556,  0.24310942,  1.87951383,  1.10843652],\n",
+       "       [-0.40278323, -0.87617667, -0.09398487,  0.3670287 ],\n",
+       "       [-1.04929264,  0.8370728 ,  1.21097545, -1.20622421]])"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.to_numpy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>A</th>\n",
+       "      <th>B</th>\n",
+       "      <th>C</th>\n",
+       "      <th>D</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "      <td>6.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>-0.742844</td>\n",
+       "      <td>-0.063360</td>\n",
+       "      <td>0.371627</td>\n",
+       "      <td>-0.415373</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>1.256713</td>\n",
+       "      <td>0.636332</td>\n",
+       "      <td>1.101702</td>\n",
+       "      <td>1.401058</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>-3.012013</td>\n",
+       "      <td>-0.876177</td>\n",
+       "      <td>-0.938094</td>\n",
+       "      <td>-2.822423</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>-0.893736</td>\n",
+       "      <td>-0.492894</td>\n",
+       "      <td>-0.488190</td>\n",
+       "      <td>-0.941697</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>-0.414925</td>\n",
+       "      <td>-0.061635</td>\n",
+       "      <td>0.348480</td>\n",
+       "      <td>0.030471</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>-0.347148</td>\n",
+       "      <td>0.298739</td>\n",
+       "      <td>1.105968</td>\n",
+       "      <td>0.327536</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>0.762696</td>\n",
+       "      <td>0.837073</td>\n",
+       "      <td>1.879514</td>\n",
+       "      <td>1.108437</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              A         B         C         D\n",
+       "count  6.000000  6.000000  6.000000  6.000000\n",
+       "mean  -0.742844 -0.063360  0.371627 -0.415373\n",
+       "std    1.256713  0.636332  1.101702  1.401058\n",
+       "min   -3.012013 -0.876177 -0.938094 -2.822423\n",
+       "25%   -0.893736 -0.492894 -0.488190 -0.941697\n",
+       "50%   -0.414925 -0.061635  0.348480  0.030471\n",
+       "75%   -0.347148  0.298739  1.105968  0.327536\n",
+       "max    0.762696  0.837073  1.879514  1.108437"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>D</th>\n",
+       "      <th>C</th>\n",
+       "      <th>B</th>\n",
+       "      <th>A</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2013-01-01</th>\n",
+       "      <td>-2.822423</td>\n",
+       "      <td>-0.619592</td>\n",
+       "      <td>-0.366380</td>\n",
+       "      <td>-0.328603</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-02</th>\n",
+       "      <td>-0.148115</td>\n",
+       "      <td>-0.938094</td>\n",
+       "      <td>0.317282</td>\n",
+       "      <td>-0.427067</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-03</th>\n",
+       "      <td>0.209057</td>\n",
+       "      <td>0.790945</td>\n",
+       "      <td>-0.535066</td>\n",
+       "      <td>-3.012013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-04</th>\n",
+       "      <td>1.108437</td>\n",
+       "      <td>1.879514</td>\n",
+       "      <td>0.243109</td>\n",
+       "      <td>0.762696</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-05</th>\n",
+       "      <td>0.367029</td>\n",
+       "      <td>-0.093985</td>\n",
+       "      <td>-0.876177</td>\n",
+       "      <td>-0.402783</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-06</th>\n",
+       "      <td>-1.206224</td>\n",
+       "      <td>1.210975</td>\n",
+       "      <td>0.837073</td>\n",
+       "      <td>-1.049293</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                   D         C         B         A\n",
+       "2013-01-01 -2.822423 -0.619592 -0.366380 -0.328603\n",
+       "2013-01-02 -0.148115 -0.938094  0.317282 -0.427067\n",
+       "2013-01-03  0.209057  0.790945 -0.535066 -3.012013\n",
+       "2013-01-04  1.108437  1.879514  0.243109  0.762696\n",
+       "2013-01-05  0.367029 -0.093985 -0.876177 -0.402783\n",
+       "2013-01-06 -1.206224  1.210975  0.837073 -1.049293"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.sort_index(axis=1, ascending=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/grouped/example.json b/notebooks/grouped/example.json
new file mode 100644
index 0000000..073c39b
--- /dev/null
+++ b/notebooks/grouped/example.json
@@ -0,0 +1,18 @@
+{"bound_params":{"pos_or_kw":[["data",[{"t":"int"},{"t":"int"},{"t":"int"},{"t":"float"},{"t":"int"},{"t":"int"}]]]},"function":{"t":"type","v":{"module":"pandas.core.series","name":"Series"}},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"date_range"]},"n":1}
+{"bound_params":{"pos_or_kw":[["start","20130101"],["periods",{"t":"int"}]]},"function":{"t":"function","v":{"module":"pandas.core.indexes.datetimes","name":"date_range"}},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"DataFrame"]},"n":1}
+{"bound_params":{"pos_or_kw":[["data",{"t":{"module":"numpy","name":"ndarray"},"v":{"dtype":"float64"}}],["index",{"t":{"module":"pandas.core.indexes.datetimes","name":"DatetimeIndex"}}],["columns",["A","B","C","D"]]]},"function":{"t":"type","v":{"module":"pandas.core.frame","name":"DataFrame"}},"n":1}
+{"bound_params":{"pos_or_kw":[["ts_input","20130102"]]},"function":{"t":"type","v":{"module":"pandas._libs.tslibs.timestamps","name":"Timestamp"}},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"Series"]},"n":1}
+{"bound_params":{"pos_or_kw":[["data",{"t":"int"}],["index",[{"t":"int"},{"t":"int"},{"t":"int"},{"t":"int"}]],["dtype","float32"]]},"function":{"t":"type","v":{"module":"pandas.core.series","name":"Series"}},"n":1}
+{"bound_params":{"pos_or_kw":[["values",["test","train","test","train"]]]},"function":{"t":"type","v":{"module":"pandas.core.arrays.categorical","name":"Categorical"}},"n":1}
+{"bound_params":{"pos_or_kw":[["data",{"t":"dict","v":[["A",{"t":"float"}],["B",{"t":{"module":"pandas._libs.tslibs.timestamps","name":"Timestamp"}}],["C",{"t":{"module":"pandas.core.series","name":"Series"}}],["D",{"t":{"module":"numpy","name":"ndarray"},"v":{"dtype":"int32"}}],["E",{"t":{"module":"pandas.core.arrays.categorical","name":"Categorical"}}],["F","foo"]]}]]},"function":{"t":"type","v":{"module":"pandas.core.frame","name":"DataFrame"}},"n":1}
+{"bound_params":{},"function":{"t":"method","v":{"name":"head","self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}}}},"n":1}
+{"bound_params":{"pos_or_kw":[["n",{"t":"int"}]]},"function":{"t":"method","v":{"name":"tail","self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}}}},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"index"]},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"columns"]},"n":1}
+{"bound_params":{},"function":{"t":"method","v":{"name":"to_numpy","self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}}}},"n":1}
+{"bound_params":{},"function":{"t":"method","v":{"name":"describe","self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}}}},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"sort_index"]},"n":1}
+{"bound_params":{"pos_or_kw":[["axis",{"t":"int"}],["ascending",{"t":"bool"}]]},"function":{"t":"method","v":{"name":"sort_index","self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}}}},"n":1}
diff --git a/notebooks/infer_apis/example.json b/notebooks/infer_apis/example.json
new file mode 100644
index 0000000..9eef114
--- /dev/null
+++ b/notebooks/infer_apis/example.json
@@ -0,0 +1,443 @@
+{
+  "modules": {
+    "pandas.core.series": {
+      "classes": {
+        "Series": {
+          "constructor_overloads": [
+            {
+              "pos_or_kw_required": {
+                "data": {
+                  "type": "list",
+                  "item": {
+                    "type": "union",
+                    "options": [
+                      {
+                        "type": {
+                          "name": "int"
+                        }
+                      },
+                      {
+                        "type": {
+                          "name": "float"
+                        }
+                      }
+                    ]
+                  }
+                }
+              },
+              "metadata": {
+                "usage.hola": 1
+              }
+            },
+            {
+              "pos_or_kw_required": {
+                "data": {
+                  "type": {
+                    "name": "int"
+                  }
+                },
+                "index": {
+                  "type": "list",
+                  "item": {
+                    "type": {
+                      "name": "int"
+                    }
+                  }
+                },
+                "dtype": {
+                  "type": "str",
+                  "options": [
+                    "float32"
+                  ]
+                }
+              },
+              "metadata": {
+                "usage.hola": 1
+              }
+            }
+          ],
+          "constructor": {
+            "pos_or_kw_required": {
+              "data": {
+                "type": "union",
+                "options": [
+                  {
+                    "type": {
+                      "name": "int"
+                    }
+                  },
+                  {
+                    "type": "list",
+                    "item": {
+                      "type": "union",
+                      "options": [
+                        {
+                          "type": {
+                            "name": "float"
+                          }
+                        },
+                        {
+                          "type": {
+                            "name": "int"
+                          }
+                        }
+                      ]
+                    }
+                  }
+                ]
+              }
+            },
+            "pos_or_kw_optional": {
+              "index": {
+                "type": "list",
+                "item": {
+                  "type": {
+                    "name": "int"
+                  }
+                }
+              },
+              "dtype": {
+                "type": "str",
+                "options": [
+                  "float32"
+                ]
+              }
+            },
+            "pos_or_kw_optional_ordering": [
+              [
+                "index",
+                "dtype"
+              ]
+            ],
+            "metadata": {
+              "usage.hola": 2
+            }
+          }
+        }
+      }
+    },
+    "pandas": {
+      "properties": {
+        "date_range": [
+          {
+            "usage.hola": 1
+          },
+          {
+            "type": "bottom"
+          }
+        ],
+        "DataFrame": [
+          {
+            "usage.hola": 1
+          },
+          {
+            "type": "bottom"
+          }
+        ],
+        "Series": [
+          {
+            "usage.hola": 1
+          },
+          {
+            "type": "bottom"
+          }
+        ]
+      }
+    },
+    "pandas.core.indexes.datetimes": {
+      "function_overloads": {
+        "date_range": [
+          {
+            "pos_or_kw_required": {
+              "start": {
+                "type": "str",
+                "options": [
+                  "20130101"
+                ]
+              },
+              "periods": {
+                "type": {
+                  "name": "int"
+                }
+              }
+            },
+            "metadata": {
+              "usage.hola": 1
+            }
+          }
+        ]
+      },
+      "functions": {
+        "date_range": {
+          "pos_or_kw_required": {
+            "start": {
+              "type": "str",
+              "options": [
+                "20130101"
+              ]
+            },
+            "periods": {
+              "type": {
+                "name": "int"
+              }
+            }
+          },
+          "metadata": {
+            "usage.hola": 1
+          }
+        }
+      }
+    },
+    "pandas.core.frame": {
+      "classes": {
+        "DataFrame": {
+          "constructor_overloads": [
+            {
+              "pos_or_kw_required": {
+                "data": {
+                  "type": {
+                    "module": "numpy",
+                    "name": "ndarray"
+                  }
+                },
+                "index": {
+                  "type": {
+                    "module": "pandas.core.indexes.datetimes",
+                    "name": "DatetimeIndex"
+                  }
+                },
+                "columns": {
+                  "type": "list",
+                  "item": {
+                    "type": "str",
+                    "options": [
+                      "D",
+                      "C",
+                      "B",
+                      "A"
+                    ]
+                  }
+                }
+              },
+              "metadata": {
+                "usage.hola": 1
+              }
+            },
+            {
+              "pos_or_kw_required": {
+                "data": {
+                  "type": "dict",
+                  "key": {
+                    "type": "str"
+                  },
+                  "value": {
+                    "type": "object"
+                  }
+                }
+              },
+              "metadata": {
+                "usage.hola": 1
+              }
+            }
+          ],
+          "method_overloads": {
+            "head": [
+              {
+                "metadata": {
+                  "usage.hola": 1
+                }
+              }
+            ],
+            "tail": [
+              {
+                "pos_or_kw_required": {
+                  "n": {
+                    "type": {
+                      "name": "int"
+                    }
+                  }
+                },
+                "metadata": {
+                  "usage.hola": 1
+                }
+              }
+            ],
+            "to_numpy": [
+              {
+                "metadata": {
+                  "usage.hola": 1
+                }
+              }
+            ],
+            "describe": [
+              {
+                "metadata": {
+                  "usage.hola": 1
+                }
+              }
+            ],
+            "sort_index": [
+              {
+                "pos_or_kw_required": {
+                  "axis": {
+                    "type": {
+                      "name": "int"
+                    }
+                  },
+                  "ascending": {
+                    "type": {
+                      "name": "bool"
+                    }
+                  }
+                },
+                "metadata": {
+                  "usage.hola": 1
+                }
+              }
+            ]
+          },
+          "methods": {
+            "head": {
+              "metadata": {
+                "usage.hola": 1
+              }
+            },
+            "tail": {
+              "pos_or_kw_required": {
+                "n": {
+                  "type": {
+                    "name": "int"
+                  }
+                }
+              },
+              "metadata": {
+                "usage.hola": 1
+              }
+            },
+            "to_numpy": {
+              "metadata": {
+                "usage.hola": 1
+              }
+            },
+            "describe": {
+              "metadata": {
+                "usage.hola": 1
+              }
+            },
+            "sort_index": {
+              "pos_or_kw_required": {
+                "axis": {
+                  "type": {
+                    "name": "int"
+                  }
+                },
+                "ascending": {
+                  "type": {
+                    "name": "bool"
+                  }
+                }
+              },
+              "metadata": {
+                "usage.hola": 1
+              }
+            }
+          },
+          "properties": {
+            "index": [
+              {
+                "usage.hola": 1
+              },
+              {
+                "type": "bottom"
+              }
+            ],
+            "columns": [
+              {
+                "usage.hola": 1
+              },
+              {
+                "type": "bottom"
+              }
+            ]
+          }
+        }
+      }
+    },
+    "pandas._libs.tslibs.timestamps": {
+      "classes": {
+        "Timestamp": {
+          "constructor_overloads": [
+            {
+              "pos_or_kw_required": {
+                "ts_input": {
+                  "type": "str",
+                  "options": [
+                    "20130102"
+                  ]
+                }
+              },
+              "metadata": {
+                "usage.hola": 1
+              }
+            }
+          ],
+          "constructor": {
+            "pos_or_kw_required": {
+              "ts_input": {
+                "type": "str",
+                "options": [
+                  "20130102"
+                ]
+              }
+            },
+            "metadata": {
+              "usage.hola": 1
+            }
+          }
+        }
+      }
+    },
+    "pandas.core.arrays.categorical": {
+      "classes": {
+        "Categorical": {
+          "constructor_overloads": [
+            {
+              "pos_or_kw_required": {
+                "values": {
+                  "type": "list",
+                  "item": {
+                    "type": "str",
+                    "options": [
+                      "train",
+                      "test"
+                    ]
+                  }
+                }
+              },
+              "metadata": {
+                "usage.hola": 1
+              }
+            }
+          ],
+          "constructor": {
+            "pos_or_kw_required": {
+              "values": {
+                "type": "list",
+                "item": {
+                  "type": "str",
+                  "options": [
+                    "train",
+                    "test"
+                  ]
+                }
+              }
+            },
+            "metadata": {
+              "usage.hola": 1
+            }
+          }
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/notebooks/notebook_to_python.py b/notebooks/notebook_to_python.py
new file mode 100644
index 0000000..633b5fc
--- /dev/null
+++ b/notebooks/notebook_to_python.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# This file was taken and modified from
+# https://github.com/data-apis/dataframe-tools/
+import argparse
+import json
+import pathlib
+import os
+
+
+def notebook_to_python(content):
+    """
+    Convert a notebook to a Python script, removing
+    all markdown.
+
+    `content` is the parsed notebook JSON.
+    """
+    result = []
+    for cell in content['cells']:
+        if cell['cell_type'] == 'code':
+            if isinstance(cell['source'], str):
+                cell['source'] = [cell['source']]
+            for line in cell['source']:
+                result += line.split('\n')
+
+    result = [line for line in result if not line.startswith('!') and not line.startswith('%')]
+
+    return '\n'.join(result)
+
+
+def process_notebook(path):
+    """
+    Process the notebook in the given path to convert it into a python file.
+    """
+    script_id = os.path.splitext(os.path.basename(path))[0]
+    with open(path, 'rb') as f:
+        script_content = f.read()
+        try:
+            nb_content = json.loads(script_content)
+        except json.JSONDecodeError:
+            # not a notebook, we assume it's already a Python file
+            script_as_python = script_content.decode('utf-8')
+        else:
+            script_as_python = notebook_to_python(nb_content)
+
+        script_dir = pathlib.Path('scripts')
+        script_dir.mkdir(parents=True, exist_ok=True)
+
+        with open(script_dir / f'{script_id}.py', 'w') as f:
+            f.write(script_as_python)
+
+
+def main(directory):
+    """
+    Take notebooks under the given directory, convert them to python files, and
+    create a directory structure ready to be executed.
+    """
+    for root, dirs, files in os.walk(directory):
+        for f in files:
+            print(f)
+            if f.endswith('ipynb'):
+                print('*****')
+                process_notebook(os.path.join(root, f))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='notebook loader')
+    parser.add_argument('dir', type=str,
+                        help='Directory of the notebooks')
+    args = parser.parse_args()
+    main(args.dir)
\ No newline at end of file
diff --git a/notebooks/output/example.jsonl b/notebooks/output/example.jsonl
new file mode 100644
index 0000000..2eb9541
--- /dev/null
+++ b/notebooks/output/example.jsonl
@@ -0,0 +1,18 @@
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:3","function":{"t":"type","v":{"module":"pandas.core.series","name":"Series"}},"bound_params":{"pos_or_kw":[["data",[{"t":"int"},{"t":"int"},{"t":"int"},{"t":"float"},{"t":"int"},{"t":"int"}]]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:4","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"date_range"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:4","function":{"t":"function","v":{"module":"pandas.core.indexes.datetimes","name":"date_range"}},"bound_params":{"pos_or_kw":[["start","20130101"],["periods",{"t":"int"}]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:5","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"DataFrame"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:5","function":{"t":"type","v":{"module":"pandas.core.frame","name":"DataFrame"}},"bound_params":{"pos_or_kw":[["data",{"t":{"module":"numpy","name":"ndarray"},"v":{"dtype":"float64"}}],["index",{"t":{"module":"pandas.core.indexes.datetimes","name":"DatetimeIndex"}}],["columns",["A","B","C","D"]]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:12","function":{"t":"type","v":{"module":"pandas._libs.tslibs.timestamps","name":"Timestamp"}},"bound_params":{"pos_or_kw":[["ts_input","20130102"]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:14","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"Series"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:14","function":{"t":"type","v":{"module":"pandas.core.series","name":"Series"}},"bound_params":{"pos_or_kw":[["data",{"t":"int"}],["index",[{"t":"int"},{"t":"int"},{"t":"int"},{"t":"int"}]],["dtype","float32"]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:18","function":{"t":"type","v":{"module":"pandas.core.arrays.categorical","name":"Categorical"}},"bound_params":{"pos_or_kw":[["values",["test","train","test","train"]]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:6","function":{"t":"type","v":{"module":"pandas.core.frame","name":"DataFrame"}},"bound_params":{"pos_or_kw":[["data",{"t":"dict","v":[["A",{"t":"float"}],["B",{"t":{"module":"pandas._libs.tslibs.timestamps","name":"Timestamp"}}],["C",{"t":{"module":"pandas.core.series","name":"Series"}}],["D",{"t":{"module":"numpy","name":"ndarray"},"v":{"dtype":"int32"}}],["E",{"t":{"module":"pandas.core.arrays.categorical","name":"Categorical"}}],["F","foo"]]}]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:25","function":{"t":"method","v":{"self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"name":"head"}},"bound_params":{}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:26","function":{"t":"method","v":{"self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"name":"tail"}},"bound_params":{"pos_or_kw":[["n",{"t":"int"}]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:27","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"index"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:28","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"columns"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:29","function":{"t":"method","v":{"self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"name":"to_numpy"}},"bound_params":{}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:30","function":{"t":"method","v":{"self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"name":"describe"}},"bound_params":{}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:31","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"sort_index"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:31","function":{"t":"method","v":{"self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"name":"sort_index"}},"bound_params":{"pos_or_kw":[["axis",{"t":"int"}],["ascending",{"t":"bool"}]]}}
diff --git a/notebooks/run_record_api.sh b/notebooks/run_record_api.sh
new file mode 100644
index 0000000..580c899
--- /dev/null
+++ b/notebooks/run_record_api.sh
@@ -0,0 +1,74 @@
+#!/bin/bash -e
+
+TOTAL=$(grep -l pandas scripts/*.py | wc -l)
+TOTAL_RUN=0
+TOTAL_FAILED=0
+TOTAL_IGNORED=0
+TOTAL_PREVIOUS=0
+
+mkdir output
+
+echo "Trying to run $(grep -l pandas scripts/*.py | wc -l) scripts"
+
+for FNAME in $(grep -l pandas scripts/*.py); do
+    echo "Running: $FNAME..."
+
+    SCRIPT_ID=$(basename $FNAME .py)
+    OUTPUT_FNAME=output/$SCRIPT_ID.jsonl
+    if [[ -e $OUTPUT_FNAME ]]; then
+        TOTAL_PREVIOUS=$((TOTAL_PREVIOUS + 1))
+        continue
+    fi
+
+    TOTAL_RUN=$((TOTAL_RUN + 1))
+
+    cd scripts
+    export PYTHON_RECORD_API_TO_MODULES="pandas"
+    export PYTHON_RECORD_API_FROM_MODULES=$SCRIPT_ID
+    export PYTHON_RECORD_API_OUTPUT_FILE=../$OUTPUT_FNAME
+    set +e
+    python -m record_api
+    FAILED=$?
+    FAILED=$((FAILED != 0 ? 1 : 0))
+    TOTAL_FAILED=$((TOTAL_FAILED + FAILED))
+    set -e
+
+    echo "Number of scripts: $TOTAL"
+    echo "Number of scripts previously run: $TOTAL_PREVIOUS"
+    echo "Number of scripts run: $TOTAL_RUN"
+    echo "Number of scripts failed: $TOTAL_FAILED"
+    echo "Number of scripts ignored (missing data source): $TOTAL_IGNORED"
+    echo "*********************************************************************"
+done
+
+cd ..
+mkdir grouped
+
+for FNAME in $(grep -l pandas output/*.jsonl); do
+    echo "Running: $FNAME..."
+
+    SCRIPT_ID=$(basename $FNAME .jsonl)
+    OUTPUT_FNAME=grouped/$SCRIPT_ID.jsonl
+
+    cd output
+    export PYTHON_RECORD_API_OUTPUT=../$OUTPUT_FNAME
+    export PYTHON_RECORD_API_INPUT=$SCRIPT_ID
+    python -m record_api.line_counts
+
+done
+
+cd ..
+mkdir infer_apis
+
+for FNAME in $(grep -l pandas infer_apis/*.jsonl); do
+    echo "Running: $FNAME..."
+
+    SCRIPT_ID=$(basename $FNAME .jsonl)
+    OUTPUT_FNAME=infer_apis/$SCRIPT_ID.json
+
+    export PYTHON_RECORD_API_OUTPUT=../$OUTPUT_FNAME
+    export PYTHON_RECORD_API_INPUT=$SCRIPT_ID
+    export PYTHON_RECORD_API_LABEL=$SCRIPT_ID
+    export PYTHON_RECORD_API_MODULES=pandas
+    python -m record_api.infer_apis
+done
\ No newline at end of file
diff --git a/notebooks/scripts/example.py b/notebooks/scripts/example.py
new file mode 100644
index 0000000..c5cc4d7
--- /dev/null
+++ b/notebooks/scripts/example.py
@@ -0,0 +1,31 @@
+import numpy as np
+import pandas as pd
+s = pd.Series([1, 3, 5, np.nan, 6, 8])
+dates = pd.date_range("20130101", periods=6)
+df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
+df2 = pd.DataFrame(
+
+        {
+
+            "A": 1.0,
+
+            "B": pd.Timestamp("20130102"),
+
+            "C": pd.Series(1, index=list(range(4)), dtype="float32"),
+
+            "D": np.array([3] * 4, dtype="int32"),
+
+            "E": pd.Categorical(["test", "train", "test", "train"]),
+
+            "F": "foo",
+
+        }
+
+)
+df.head()
+df.tail(3)
+df.index
+df.columns
+df.to_numpy()
+df.describe()
+df.sort_index(axis=1, ascending=False)
\ No newline at end of file

	A	B	C	D
2013-01-01	-0.328603	-0.366380	-0.619592	-2.822423
2013-01-02	-0.427067	0.317282	-0.938094	-0.148115
2013-01-03	-3.012013	-0.535066	0.790945	0.209057
2013-01-04	0.762696	0.243109	1.879514	1.108437
2013-01-05	-0.402783	-0.876177	-0.093985	0.367029
	A	B	C	D
count	6.000000	6.000000	6.000000	6.000000
mean	-0.742844	-0.063360	0.371627	-0.415373
std	1.256713	0.636332	1.101702	1.401058
min	-3.012013	-0.876177	-0.938094	-2.822423
25%	-0.893736	-0.492894	-0.488190	-0.941697
50%	-0.414925	-0.061635	0.348480	0.030471
75%	-0.347148	0.298739	1.105968	0.327536
max	0.762696	0.837073	1.879514	1.108437