diff --git a/notebooks/README.md b/notebooks/README.md
new file mode 100644
index 0000000..09d7c2f
--- /dev/null
+++ b/notebooks/README.md
@@ -0,0 +1,30 @@
+# Running the record api tooling on notebooks
+
+First of all, clone the record api repo
+
+```
+git clone https://github.com/data-apis/python-record-api.git
+```
+
+And create a new conda env for installing all the notebook's dependencies and the record api tooling.
+
+```
+conda create -n record-api python=3.8
+conda activate record-api
+cd python-record-api
+pip install record_api
+```
+
+We have an example of a notebook under `/notebooks/data`, so we need to transform it
+to a python file so the record api tooling work.
+
+```
+cd notebooks
+python notebook_to_python.py data
+```
+
+A new folder named `scripts` under the `notebooks` directory will be created with
+all the files converted into python files.
+
+Then, run the file `run_record_api.sh` to generate the inferred API calls in each notebook.
+The output will be under `infer_apis` directory as a json per notebook file.
diff --git a/notebooks/data/example.ipynb b/notebooks/data/example.ipynb
new file mode 100644
index 0000000..c29d8df
--- /dev/null
+++ b/notebooks/data/example.ipynb
@@ -0,0 +1,538 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "s = pd.Series([1, 3, 5, np.nan, 6, 8])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dates = pd.date_range(\"20130101\", periods=6)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list(\"ABCD\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df2 = pd.DataFrame(\n",
+ " {\n",
+ " \"A\": 1.0,\n",
+ " \"B\": pd.Timestamp(\"20130102\"),\n",
+ " \"C\": pd.Series(1, index=list(range(4)), dtype=\"float32\"),\n",
+ " \"D\": np.array([3] * 4, dtype=\"int32\"),\n",
+ " \"E\": pd.Categorical([\"test\", \"train\", \"test\", \"train\"]),\n",
+ " \"F\": \"foo\",\n",
+ " }\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " A | \n",
+ " B | \n",
+ " C | \n",
+ " D | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-01-01 | \n",
+ " -0.328603 | \n",
+ " -0.366380 | \n",
+ " -0.619592 | \n",
+ " -2.822423 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-02 | \n",
+ " -0.427067 | \n",
+ " 0.317282 | \n",
+ " -0.938094 | \n",
+ " -0.148115 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-03 | \n",
+ " -3.012013 | \n",
+ " -0.535066 | \n",
+ " 0.790945 | \n",
+ " 0.209057 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-04 | \n",
+ " 0.762696 | \n",
+ " 0.243109 | \n",
+ " 1.879514 | \n",
+ " 1.108437 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-05 | \n",
+ " -0.402783 | \n",
+ " -0.876177 | \n",
+ " -0.093985 | \n",
+ " 0.367029 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " A B C D\n",
+ "2013-01-01 -0.328603 -0.366380 -0.619592 -2.822423\n",
+ "2013-01-02 -0.427067 0.317282 -0.938094 -0.148115\n",
+ "2013-01-03 -3.012013 -0.535066 0.790945 0.209057\n",
+ "2013-01-04 0.762696 0.243109 1.879514 1.108437\n",
+ "2013-01-05 -0.402783 -0.876177 -0.093985 0.367029"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " A | \n",
+ " B | \n",
+ " C | \n",
+ " D | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-01-04 | \n",
+ " 0.762696 | \n",
+ " 0.243109 | \n",
+ " 1.879514 | \n",
+ " 1.108437 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-05 | \n",
+ " -0.402783 | \n",
+ " -0.876177 | \n",
+ " -0.093985 | \n",
+ " 0.367029 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-06 | \n",
+ " -1.049293 | \n",
+ " 0.837073 | \n",
+ " 1.210975 | \n",
+ " -1.206224 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " A B C D\n",
+ "2013-01-04 0.762696 0.243109 1.879514 1.108437\n",
+ "2013-01-05 -0.402783 -0.876177 -0.093985 0.367029\n",
+ "2013-01-06 -1.049293 0.837073 1.210975 -1.206224"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.tail(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',\n",
+ " '2013-01-05', '2013-01-06'],\n",
+ " dtype='datetime64[ns]', freq='D')"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['A', 'B', 'C', 'D'], dtype='object')"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[-0.32860292, -0.36637983, -0.61959229, -2.82242287],\n",
+ " [-0.42706726, 0.3172823 , -0.93809362, -0.14811456],\n",
+ " [-3.01201345, -0.53506589, 0.79094519, 0.20905686],\n",
+ " [ 0.76269556, 0.24310942, 1.87951383, 1.10843652],\n",
+ " [-0.40278323, -0.87617667, -0.09398487, 0.3670287 ],\n",
+ " [-1.04929264, 0.8370728 , 1.21097545, -1.20622421]])"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.to_numpy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " A | \n",
+ " B | \n",
+ " C | \n",
+ " D | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 6.000000 | \n",
+ " 6.000000 | \n",
+ " 6.000000 | \n",
+ " 6.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " -0.742844 | \n",
+ " -0.063360 | \n",
+ " 0.371627 | \n",
+ " -0.415373 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 1.256713 | \n",
+ " 0.636332 | \n",
+ " 1.101702 | \n",
+ " 1.401058 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " -3.012013 | \n",
+ " -0.876177 | \n",
+ " -0.938094 | \n",
+ " -2.822423 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " -0.893736 | \n",
+ " -0.492894 | \n",
+ " -0.488190 | \n",
+ " -0.941697 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " -0.414925 | \n",
+ " -0.061635 | \n",
+ " 0.348480 | \n",
+ " 0.030471 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " -0.347148 | \n",
+ " 0.298739 | \n",
+ " 1.105968 | \n",
+ " 0.327536 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 0.762696 | \n",
+ " 0.837073 | \n",
+ " 1.879514 | \n",
+ " 1.108437 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " A B C D\n",
+ "count 6.000000 6.000000 6.000000 6.000000\n",
+ "mean -0.742844 -0.063360 0.371627 -0.415373\n",
+ "std 1.256713 0.636332 1.101702 1.401058\n",
+ "min -3.012013 -0.876177 -0.938094 -2.822423\n",
+ "25% -0.893736 -0.492894 -0.488190 -0.941697\n",
+ "50% -0.414925 -0.061635 0.348480 0.030471\n",
+ "75% -0.347148 0.298739 1.105968 0.327536\n",
+ "max 0.762696 0.837073 1.879514 1.108437"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " D | \n",
+ " C | \n",
+ " B | \n",
+ " A | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2013-01-01 | \n",
+ " -2.822423 | \n",
+ " -0.619592 | \n",
+ " -0.366380 | \n",
+ " -0.328603 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-02 | \n",
+ " -0.148115 | \n",
+ " -0.938094 | \n",
+ " 0.317282 | \n",
+ " -0.427067 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-03 | \n",
+ " 0.209057 | \n",
+ " 0.790945 | \n",
+ " -0.535066 | \n",
+ " -3.012013 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-04 | \n",
+ " 1.108437 | \n",
+ " 1.879514 | \n",
+ " 0.243109 | \n",
+ " 0.762696 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-05 | \n",
+ " 0.367029 | \n",
+ " -0.093985 | \n",
+ " -0.876177 | \n",
+ " -0.402783 | \n",
+ "
\n",
+ " \n",
+ " 2013-01-06 | \n",
+ " -1.206224 | \n",
+ " 1.210975 | \n",
+ " 0.837073 | \n",
+ " -1.049293 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " D C B A\n",
+ "2013-01-01 -2.822423 -0.619592 -0.366380 -0.328603\n",
+ "2013-01-02 -0.148115 -0.938094 0.317282 -0.427067\n",
+ "2013-01-03 0.209057 0.790945 -0.535066 -3.012013\n",
+ "2013-01-04 1.108437 1.879514 0.243109 0.762696\n",
+ "2013-01-05 0.367029 -0.093985 -0.876177 -0.402783\n",
+ "2013-01-06 -1.206224 1.210975 0.837073 -1.049293"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.sort_index(axis=1, ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/grouped/example.json b/notebooks/grouped/example.json
new file mode 100644
index 0000000..073c39b
--- /dev/null
+++ b/notebooks/grouped/example.json
@@ -0,0 +1,18 @@
+{"bound_params":{"pos_or_kw":[["data",[{"t":"int"},{"t":"int"},{"t":"int"},{"t":"float"},{"t":"int"},{"t":"int"}]]]},"function":{"t":"type","v":{"module":"pandas.core.series","name":"Series"}},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"date_range"]},"n":1}
+{"bound_params":{"pos_or_kw":[["start","20130101"],["periods",{"t":"int"}]]},"function":{"t":"function","v":{"module":"pandas.core.indexes.datetimes","name":"date_range"}},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"DataFrame"]},"n":1}
+{"bound_params":{"pos_or_kw":[["data",{"t":{"module":"numpy","name":"ndarray"},"v":{"dtype":"float64"}}],["index",{"t":{"module":"pandas.core.indexes.datetimes","name":"DatetimeIndex"}}],["columns",["A","B","C","D"]]]},"function":{"t":"type","v":{"module":"pandas.core.frame","name":"DataFrame"}},"n":1}
+{"bound_params":{"pos_or_kw":[["ts_input","20130102"]]},"function":{"t":"type","v":{"module":"pandas._libs.tslibs.timestamps","name":"Timestamp"}},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"Series"]},"n":1}
+{"bound_params":{"pos_or_kw":[["data",{"t":"int"}],["index",[{"t":"int"},{"t":"int"},{"t":"int"},{"t":"int"}]],["dtype","float32"]]},"function":{"t":"type","v":{"module":"pandas.core.series","name":"Series"}},"n":1}
+{"bound_params":{"pos_or_kw":[["values",["test","train","test","train"]]]},"function":{"t":"type","v":{"module":"pandas.core.arrays.categorical","name":"Categorical"}},"n":1}
+{"bound_params":{"pos_or_kw":[["data",{"t":"dict","v":[["A",{"t":"float"}],["B",{"t":{"module":"pandas._libs.tslibs.timestamps","name":"Timestamp"}}],["C",{"t":{"module":"pandas.core.series","name":"Series"}}],["D",{"t":{"module":"numpy","name":"ndarray"},"v":{"dtype":"int32"}}],["E",{"t":{"module":"pandas.core.arrays.categorical","name":"Categorical"}}],["F","foo"]]}]]},"function":{"t":"type","v":{"module":"pandas.core.frame","name":"DataFrame"}},"n":1}
+{"bound_params":{},"function":{"t":"method","v":{"name":"head","self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}}}},"n":1}
+{"bound_params":{"pos_or_kw":[["n",{"t":"int"}]]},"function":{"t":"method","v":{"name":"tail","self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}}}},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"index"]},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"columns"]},"n":1}
+{"bound_params":{},"function":{"t":"method","v":{"name":"to_numpy","self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}}}},"n":1}
+{"bound_params":{},"function":{"t":"method","v":{"name":"describe","self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}}}},"n":1}
+{"function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"sort_index"]},"n":1}
+{"bound_params":{"pos_or_kw":[["axis",{"t":"int"}],["ascending",{"t":"bool"}]]},"function":{"t":"method","v":{"name":"sort_index","self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}}}},"n":1}
diff --git a/notebooks/infer_apis/example.json b/notebooks/infer_apis/example.json
new file mode 100644
index 0000000..9eef114
--- /dev/null
+++ b/notebooks/infer_apis/example.json
@@ -0,0 +1,443 @@
+{
+ "modules": {
+ "pandas.core.series": {
+ "classes": {
+ "Series": {
+ "constructor_overloads": [
+ {
+ "pos_or_kw_required": {
+ "data": {
+ "type": "list",
+ "item": {
+ "type": "union",
+ "options": [
+ {
+ "type": {
+ "name": "int"
+ }
+ },
+ {
+ "type": {
+ "name": "float"
+ }
+ }
+ ]
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ },
+ {
+ "pos_or_kw_required": {
+ "data": {
+ "type": {
+ "name": "int"
+ }
+ },
+ "index": {
+ "type": "list",
+ "item": {
+ "type": {
+ "name": "int"
+ }
+ }
+ },
+ "dtype": {
+ "type": "str",
+ "options": [
+ "float32"
+ ]
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ ],
+ "constructor": {
+ "pos_or_kw_required": {
+ "data": {
+ "type": "union",
+ "options": [
+ {
+ "type": {
+ "name": "int"
+ }
+ },
+ {
+ "type": "list",
+ "item": {
+ "type": "union",
+ "options": [
+ {
+ "type": {
+ "name": "float"
+ }
+ },
+ {
+ "type": {
+ "name": "int"
+ }
+ }
+ ]
+ }
+ }
+ ]
+ }
+ },
+ "pos_or_kw_optional": {
+ "index": {
+ "type": "list",
+ "item": {
+ "type": {
+ "name": "int"
+ }
+ }
+ },
+ "dtype": {
+ "type": "str",
+ "options": [
+ "float32"
+ ]
+ }
+ },
+ "pos_or_kw_optional_ordering": [
+ [
+ "index",
+ "dtype"
+ ]
+ ],
+ "metadata": {
+ "usage.hola": 2
+ }
+ }
+ }
+ }
+ },
+ "pandas": {
+ "properties": {
+ "date_range": [
+ {
+ "usage.hola": 1
+ },
+ {
+ "type": "bottom"
+ }
+ ],
+ "DataFrame": [
+ {
+ "usage.hola": 1
+ },
+ {
+ "type": "bottom"
+ }
+ ],
+ "Series": [
+ {
+ "usage.hola": 1
+ },
+ {
+ "type": "bottom"
+ }
+ ]
+ }
+ },
+ "pandas.core.indexes.datetimes": {
+ "function_overloads": {
+ "date_range": [
+ {
+ "pos_or_kw_required": {
+ "start": {
+ "type": "str",
+ "options": [
+ "20130101"
+ ]
+ },
+ "periods": {
+ "type": {
+ "name": "int"
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ ]
+ },
+ "functions": {
+ "date_range": {
+ "pos_or_kw_required": {
+ "start": {
+ "type": "str",
+ "options": [
+ "20130101"
+ ]
+ },
+ "periods": {
+ "type": {
+ "name": "int"
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ }
+ },
+ "pandas.core.frame": {
+ "classes": {
+ "DataFrame": {
+ "constructor_overloads": [
+ {
+ "pos_or_kw_required": {
+ "data": {
+ "type": {
+ "module": "numpy",
+ "name": "ndarray"
+ }
+ },
+ "index": {
+ "type": {
+ "module": "pandas.core.indexes.datetimes",
+ "name": "DatetimeIndex"
+ }
+ },
+ "columns": {
+ "type": "list",
+ "item": {
+ "type": "str",
+ "options": [
+ "D",
+ "C",
+ "B",
+ "A"
+ ]
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ },
+ {
+ "pos_or_kw_required": {
+ "data": {
+ "type": "dict",
+ "key": {
+ "type": "str"
+ },
+ "value": {
+ "type": "object"
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ ],
+ "method_overloads": {
+ "head": [
+ {
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ ],
+ "tail": [
+ {
+ "pos_or_kw_required": {
+ "n": {
+ "type": {
+ "name": "int"
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ ],
+ "to_numpy": [
+ {
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ ],
+ "describe": [
+ {
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ ],
+ "sort_index": [
+ {
+ "pos_or_kw_required": {
+ "axis": {
+ "type": {
+ "name": "int"
+ }
+ },
+ "ascending": {
+ "type": {
+ "name": "bool"
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ ]
+ },
+ "methods": {
+ "head": {
+ "metadata": {
+ "usage.hola": 1
+ }
+ },
+ "tail": {
+ "pos_or_kw_required": {
+ "n": {
+ "type": {
+ "name": "int"
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ },
+ "to_numpy": {
+ "metadata": {
+ "usage.hola": 1
+ }
+ },
+ "describe": {
+ "metadata": {
+ "usage.hola": 1
+ }
+ },
+ "sort_index": {
+ "pos_or_kw_required": {
+ "axis": {
+ "type": {
+ "name": "int"
+ }
+ },
+ "ascending": {
+ "type": {
+ "name": "bool"
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ },
+ "properties": {
+ "index": [
+ {
+ "usage.hola": 1
+ },
+ {
+ "type": "bottom"
+ }
+ ],
+ "columns": [
+ {
+ "usage.hola": 1
+ },
+ {
+ "type": "bottom"
+ }
+ ]
+ }
+ }
+ }
+ },
+ "pandas._libs.tslibs.timestamps": {
+ "classes": {
+ "Timestamp": {
+ "constructor_overloads": [
+ {
+ "pos_or_kw_required": {
+ "ts_input": {
+ "type": "str",
+ "options": [
+ "20130102"
+ ]
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ ],
+ "constructor": {
+ "pos_or_kw_required": {
+ "ts_input": {
+ "type": "str",
+ "options": [
+ "20130102"
+ ]
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ }
+ }
+ },
+ "pandas.core.arrays.categorical": {
+ "classes": {
+ "Categorical": {
+ "constructor_overloads": [
+ {
+ "pos_or_kw_required": {
+ "values": {
+ "type": "list",
+ "item": {
+ "type": "str",
+ "options": [
+ "train",
+ "test"
+ ]
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ ],
+ "constructor": {
+ "pos_or_kw_required": {
+ "values": {
+ "type": "list",
+ "item": {
+ "type": "str",
+ "options": [
+ "train",
+ "test"
+ ]
+ }
+ }
+ },
+ "metadata": {
+ "usage.hola": 1
+ }
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/notebooks/notebook_to_python.py b/notebooks/notebook_to_python.py
new file mode 100644
index 0000000..633b5fc
--- /dev/null
+++ b/notebooks/notebook_to_python.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# This file was taken and modified from
+# https://github.com/data-apis/dataframe-tools/
+import argparse
+import json
+import pathlib
+import os
+
+
+def notebook_to_python(content):
+ """
+ Convert a notebook to a Python script, removing
+ all markdown.
+
+ `content` is the parsed notebook JSON.
+ """
+ result = []
+ for cell in content['cells']:
+ if cell['cell_type'] == 'code':
+ if isinstance(cell['source'], str):
+ cell['source'] = [cell['source']]
+ for line in cell['source']:
+ result += line.split('\n')
+
+ result = [line for line in result if not line.startswith('!') and not line.startswith('%')]
+
+ return '\n'.join(result)
+
+
+def process_notebook(path):
+ """
+ Process the notebook in the given path to convert it into a python file.
+ """
+ script_id = os.path.splitext(os.path.basename(path))[0]
+ with open(path, 'rb') as f:
+ script_content = f.read()
+ try:
+ nb_content = json.loads(script_content)
+ except json.JSONDecodeError:
+ # not a notebook, we assume it's already a Python file
+ script_as_python = script_content.decode('utf-8')
+ else:
+ script_as_python = notebook_to_python(nb_content)
+
+ script_dir = pathlib.Path('scripts')
+ script_dir.mkdir(parents=True, exist_ok=True)
+
+ with open(script_dir / f'{script_id}.py', 'w') as f:
+ f.write(script_as_python)
+
+
+def main(directory):
+ """
+ Take notebooks under the given directory, convert them to python files, and
+ create a directory structure ready to be executed.
+ """
+ for root, dirs, files in os.walk(directory):
+ for f in files:
+ print(f)
+ if f.endswith('ipynb'):
+ print('*****')
+ process_notebook(os.path.join(root, f))
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='notebook loader')
+ parser.add_argument('dir', type=str,
+ help='Directory of the notebooks')
+ args = parser.parse_args()
+ main(args.dir)
\ No newline at end of file
diff --git a/notebooks/output/example.jsonl b/notebooks/output/example.jsonl
new file mode 100644
index 0000000..2eb9541
--- /dev/null
+++ b/notebooks/output/example.jsonl
@@ -0,0 +1,18 @@
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:3","function":{"t":"type","v":{"module":"pandas.core.series","name":"Series"}},"bound_params":{"pos_or_kw":[["data",[{"t":"int"},{"t":"int"},{"t":"int"},{"t":"float"},{"t":"int"},{"t":"int"}]]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:4","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"date_range"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:4","function":{"t":"function","v":{"module":"pandas.core.indexes.datetimes","name":"date_range"}},"bound_params":{"pos_or_kw":[["start","20130101"],["periods",{"t":"int"}]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:5","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"DataFrame"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:5","function":{"t":"type","v":{"module":"pandas.core.frame","name":"DataFrame"}},"bound_params":{"pos_or_kw":[["data",{"t":{"module":"numpy","name":"ndarray"},"v":{"dtype":"float64"}}],["index",{"t":{"module":"pandas.core.indexes.datetimes","name":"DatetimeIndex"}}],["columns",["A","B","C","D"]]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:12","function":{"t":"type","v":{"module":"pandas._libs.tslibs.timestamps","name":"Timestamp"}},"bound_params":{"pos_or_kw":[["ts_input","20130102"]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:14","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":"module","v":"pandas"},"Series"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:14","function":{"t":"type","v":{"module":"pandas.core.series","name":"Series"}},"bound_params":{"pos_or_kw":[["data",{"t":"int"}],["index",[{"t":"int"},{"t":"int"},{"t":"int"},{"t":"int"}]],["dtype","float32"]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:18","function":{"t":"type","v":{"module":"pandas.core.arrays.categorical","name":"Categorical"}},"bound_params":{"pos_or_kw":[["values",["test","train","test","train"]]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:6","function":{"t":"type","v":{"module":"pandas.core.frame","name":"DataFrame"}},"bound_params":{"pos_or_kw":[["data",{"t":"dict","v":[["A",{"t":"float"}],["B",{"t":{"module":"pandas._libs.tslibs.timestamps","name":"Timestamp"}}],["C",{"t":{"module":"pandas.core.series","name":"Series"}}],["D",{"t":{"module":"numpy","name":"ndarray"},"v":{"dtype":"int32"}}],["E",{"t":{"module":"pandas.core.arrays.categorical","name":"Categorical"}}],["F","foo"]]}]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:25","function":{"t":"method","v":{"self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"name":"head"}},"bound_params":{}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:26","function":{"t":"method","v":{"self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"name":"tail"}},"bound_params":{"pos_or_kw":[["n",{"t":"int"}]]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:27","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"index"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:28","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"columns"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:29","function":{"t":"method","v":{"self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"name":"to_numpy"}},"bound_params":{}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:30","function":{"t":"method","v":{"self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"name":"describe"}},"bound_params":{}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:31","function":{"t":"builtin_function_or_method","v":"getattr"},"params":{"args":[{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"sort_index"]}}
+{"location":"/Users/tefa/Documents/python-record-api/notebooks/scripts/example.py:31","function":{"t":"method","v":{"self":{"t":{"module":"pandas.core.frame","name":"DataFrame"}},"name":"sort_index"}},"bound_params":{"pos_or_kw":[["axis",{"t":"int"}],["ascending",{"t":"bool"}]]}}
diff --git a/notebooks/run_record_api.sh b/notebooks/run_record_api.sh
new file mode 100644
index 0000000..580c899
--- /dev/null
+++ b/notebooks/run_record_api.sh
@@ -0,0 +1,74 @@
+#!/bin/bash -e
+
+TOTAL=$(grep -l pandas scripts/*.py | wc -l)
+TOTAL_RUN=0
+TOTAL_FAILED=0
+TOTAL_IGNORED=0
+TOTAL_PREVIOUS=0
+
+mkdir output
+
+echo "Trying to run $(grep -l pandas scripts/*.py | wc -l) scripts"
+
+for FNAME in $(grep -l pandas scripts/*.py); do
+ echo "Running: $FNAME..."
+
+ SCRIPT_ID=$(basename $FNAME .py)
+ OUTPUT_FNAME=output/$SCRIPT_ID.jsonl
+ if [[ -e $OUTPUT_FNAME ]]; then
+ TOTAL_PREVIOUS=$((TOTAL_PREVIOUS + 1))
+ continue
+ fi
+
+ TOTAL_RUN=$((TOTAL_RUN + 1))
+
+ cd scripts
+ export PYTHON_RECORD_API_TO_MODULES="pandas"
+ export PYTHON_RECORD_API_FROM_MODULES=$SCRIPT_ID
+ export PYTHON_RECORD_API_OUTPUT_FILE=../$OUTPUT_FNAME
+ set +e
+ python -m record_api
+ FAILED=$?
+ FAILED=$((FAILED != 0 ? 1 : 0))
+ TOTAL_FAILED=$((TOTAL_FAILED + FAILED))
+ set -e
+
+ echo "Number of scripts: $TOTAL"
+ echo "Number of scripts previously run: $TOTAL_PREVIOUS"
+ echo "Number of scripts run: $TOTAL_RUN"
+ echo "Number of scripts failed: $TOTAL_FAILED"
+ echo "Number of scripts ignored (missing data source): $TOTAL_IGNORED"
+ echo "*********************************************************************"
+done
+
+cd ..
+mkdir grouped
+
+for FNAME in $(grep -l pandas output/*.jsonl); do
+ echo "Running: $FNAME..."
+
+ SCRIPT_ID=$(basename $FNAME .jsonl)
+ OUTPUT_FNAME=grouped/$SCRIPT_ID.jsonl
+
+ cd output
+ export PYTHON_RECORD_API_OUTPUT=../$OUTPUT_FNAME
+ export PYTHON_RECORD_API_INPUT=$SCRIPT_ID
+ python -m record_api.line_counts
+
+done
+
+cd ..
+mkdir infer_apis
+
+for FNAME in $(grep -l pandas infer_apis/*.jsonl); do
+ echo "Running: $FNAME..."
+
+ SCRIPT_ID=$(basename $FNAME .jsonl)
+ OUTPUT_FNAME=infer_apis/$SCRIPT_ID.json
+
+ export PYTHON_RECORD_API_OUTPUT=../$OUTPUT_FNAME
+ export PYTHON_RECORD_API_INPUT=$SCRIPT_ID
+ export PYTHON_RECORD_API_LABEL=$SCRIPT_ID
+ export PYTHON_RECORD_API_MODULES=pandas
+ python -m record_api.infer_apis
+done
\ No newline at end of file
diff --git a/notebooks/scripts/example.py b/notebooks/scripts/example.py
new file mode 100644
index 0000000..c5cc4d7
--- /dev/null
+++ b/notebooks/scripts/example.py
@@ -0,0 +1,31 @@
+import numpy as np
+import pandas as pd
+s = pd.Series([1, 3, 5, np.nan, 6, 8])
+dates = pd.date_range("20130101", periods=6)
+df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
+df2 = pd.DataFrame(
+
+ {
+
+ "A": 1.0,
+
+ "B": pd.Timestamp("20130102"),
+
+ "C": pd.Series(1, index=list(range(4)), dtype="float32"),
+
+ "D": np.array([3] * 4, dtype="int32"),
+
+ "E": pd.Categorical(["test", "train", "test", "train"]),
+
+ "F": "foo",
+
+ }
+
+)
+df.head()
+df.tail(3)
+df.index
+df.columns
+df.to_numpy()
+df.describe()
+df.sort_index(axis=1, ascending=False)
\ No newline at end of file