Skip to content

Commit 8dbacc3

Browse files
committed
basic functionality
0 parents  commit 8dbacc3

File tree

10 files changed

+1179
-0
lines changed

10 files changed

+1179
-0
lines changed

.gitignore

+141
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
*.bak
2+
.gitattributes
3+
.last_checked
4+
.gitconfig
5+
*.bak
6+
*.log
7+
*~
8+
~*
9+
_tmp*
10+
tmp*
11+
tags
12+
13+
# Byte-compiled / optimized / DLL files
14+
__pycache__/
15+
*.py[cod]
16+
*$py.class
17+
18+
# C extensions
19+
*.so
20+
21+
# Distribution / packaging
22+
.Python
23+
env/
24+
build/
25+
develop-eggs/
26+
dist/
27+
downloads/
28+
eggs/
29+
.eggs/
30+
lib/
31+
lib64/
32+
parts/
33+
sdist/
34+
var/
35+
wheels/
36+
*.egg-info/
37+
.installed.cfg
38+
*.egg
39+
40+
# PyInstaller
41+
# Usually these files are written by a python script from a template
42+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
43+
*.manifest
44+
*.spec
45+
46+
# Installer logs
47+
pip-log.txt
48+
pip-delete-this-directory.txt
49+
50+
# Unit test / coverage reports
51+
htmlcov/
52+
.tox/
53+
.coverage
54+
.coverage.*
55+
.cache
56+
nosetests.xml
57+
coverage.xml
58+
*.cover
59+
.hypothesis/
60+
61+
# Translations
62+
*.mo
63+
*.pot
64+
65+
# Django stuff:
66+
*.log
67+
local_settings.py
68+
69+
# Flask stuff:
70+
instance/
71+
.webassets-cache
72+
73+
# Scrapy stuff:
74+
.scrapy
75+
76+
# Sphinx documentation
77+
docs/_build/
78+
79+
# PyBuilder
80+
target/
81+
82+
# Jupyter Notebook
83+
.ipynb_checkpoints
84+
85+
# pyenv
86+
.python-version
87+
88+
# celery beat schedule file
89+
celerybeat-schedule
90+
91+
# SageMath parsed files
92+
*.sage.py
93+
94+
# dotenv
95+
.env
96+
97+
# virtualenv
98+
.venv
99+
venv/
100+
ENV/
101+
102+
# Spyder project settings
103+
.spyderproject
104+
.spyproject
105+
106+
# Rope project settings
107+
.ropeproject
108+
109+
# mkdocs documentation
110+
/site
111+
112+
# mypy
113+
.mypy_cache/
114+
115+
.vscode
116+
*.swp
117+
118+
# osx generated files
119+
.DS_Store
120+
.DS_Store?
121+
.Trashes
122+
ehthumbs.db
123+
Thumbs.db
124+
.idea
125+
126+
# pytest
127+
.pytest_cache
128+
129+
# tools/trust-doc-nbs
130+
docs_src/.last_checked
131+
132+
# symlinks to fastai
133+
docs_src/fastai
134+
tools/fastai
135+
136+
# link checker
137+
checklink/cookies.txt
138+
139+
# .gitconfig is now autogenerated
140+
.gitconfig
141+

README.md

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# `notion-df`: Seamlessly Connecting Notion Database with Pandas DataFrame
2+
3+
*Please Note: This project is currently in pre-alpha stage. The code are not appropriately documented and tested. Please report any issues you find.*
4+
5+
## Installation
6+
7+
```bash
8+
git clone https://github.com/lolipopshock/notion-df && cd notion-df
9+
pip install -e .
10+
```
11+
12+
## Usage
13+
14+
- Download your Notion table as a pandas DataFrame
15+
```python
16+
import notion_df
17+
df = notion_df.load(notion_database_url, api_key=api_key)
18+
df.head()
19+
```
20+
21+
- Append a local `df` to a Notion database:
22+
23+
```python
24+
import notion_df
25+
notion_df.upload(df, notion_database_url, title="page-title", api_key=api_key)
26+
```
27+
28+
- Upload a local `df` to a newly created database in a Notion page:
29+
30+
```python
31+
import notion_df
32+
notion_df.upload(df, notion_page_url, title="page-title", api_key=api_key)
33+
```
34+
35+
- Tired of typing `api_key=api_key` each time?
36+
37+
```python
38+
import notion_df
39+
notion_df.config(api_key=api_key)
40+
```

requirements.txt

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
notion-client>=0.8.0
2+
pydantic~=1.9.0
3+
pandas
4+
dataclasses

setup.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from setuptools import setup, find_packages
2+
import os
3+
4+
5+
def get_requirements(req_file):
6+
reqs = []
7+
with open(req_file, "r") as fp:
8+
for line in fp.readlines():
9+
if line.startswith("#") or line.strip() == "":
10+
continue
11+
else:
12+
reqs.append(line.strip())
13+
return reqs
14+
15+
16+
# A trick from https://github.com/jina-ai/jina/blob/79b302c93b01689e82cf4b52f46522eb7497c404/setup.py#L20
17+
libinfo_py = os.path.join("src", "notion_df", "__init__.py")
18+
libinfo_content = open(libinfo_py, "r", encoding="utf8").readlines()
19+
version_line = [l.strip() for l in libinfo_content if l.startswith("__version__")][0]
20+
exec(version_line) # gives __version__
21+
22+
setup(
23+
name="notion-df",
24+
version=__version__,
25+
description="Notion-DF: Seamlessly Connecting Notion Database with Pandas DataFrame",
26+
author="Zejiang Shen",
27+
author_email="[email protected]",
28+
license="Apache-2.0",
29+
url="https://github.com/lolipopshock/notion-df",
30+
package_dir={"": "src"},
31+
packages=find_packages("src"),
32+
long_description=open("README.md", "r", encoding="utf-8").read(),
33+
long_description_content_type="text/markdown",
34+
python_requires=">=3.6",
35+
install_requires=get_requirements("requirements.txt"),
36+
)

src/notion_df/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from notion_df.agent import load, upload
2+
3+
__version__ = "0.0.1"

src/notion_df/agent.py

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
from typing import List, Dict, Optional
2+
from datetime import datetime
3+
import os
4+
from functools import wraps
5+
6+
from notion_client import Client
7+
from notion_client.helpers import get_id
8+
9+
from notion_df.values import PageProperties, PageProperty
10+
from notion_df.configs import DatabaseSchema, guess_align_schema_for_df
11+
12+
API_KEY = None
13+
NOT_REVERSE_DATAFRAME = -1
14+
# whether to reverse the dataframe when performing uploading.
15+
# for some reason, notion will reverse the order of dataframe
16+
# when uploading.
17+
# -1 for reversing, for not reversing
18+
19+
20+
def config(api_key: str):
21+
API_KEY = api_key
22+
23+
24+
def _load_api_key(api_key: str) -> str:
25+
if api_key is not None:
26+
return api_key
27+
elif API_KEY is not None:
28+
return API_KEY
29+
elif os.environ.get("NOTION_API_KEY") is not None:
30+
return os.environ.get("NOTION_API_KEY")
31+
else:
32+
raise ValueError("No API key provided")
33+
34+
35+
def _is_notion_database(notion_url):
36+
return "?v=" in notion_url.split("/")[-1]
37+
38+
39+
def use_client(func):
40+
@wraps(func)
41+
def wrapper(*args, **kwargs):
42+
orig_client = client = kwargs.pop("client", None)
43+
44+
if client is None:
45+
api_key = _load_api_key(kwargs.pop("api_key", None))
46+
client = Client(auth=api_key)
47+
out = func(client=client, *args, **kwargs)
48+
49+
if orig_client is None:
50+
# Automatically close the client if it was not passed in
51+
client.close()
52+
return out
53+
54+
return wrapper
55+
56+
57+
@use_client
58+
def load(notion_url: str, *, api_key: str = None, client: Client = None):
59+
assert _is_notion_database(notion_url)
60+
database_id = get_id(notion_url)
61+
62+
query_results = client.databases.query(database_id=database_id)
63+
assert query_results["object"] == "list"
64+
properties = PageProperties.from_raw(
65+
query_results["results"]
66+
) # TODO: handle pagination
67+
68+
retrieve_results = client.databases.retrieve(database_id=database_id)
69+
schema = DatabaseSchema.from_raw(retrieve_results["properties"])
70+
71+
df = properties.to_frame()
72+
df.schema = schema
73+
return df
74+
75+
76+
def create_database(page_id:str, client: Client, schema:DatabaseSchema, title:str=""):
77+
response = client.databases.create(
78+
parent={"type": "page_id", "page_id": page_id},
79+
title=[{"type": "text", "text": {"content": title}}],
80+
properties=schema.query_dict(),
81+
)
82+
assert response['object'] == 'database'
83+
return response
84+
85+
86+
def upload_row_to_database(row, database_id, schema, client):
87+
88+
properties = PageProperty.from_series(row, schema).query_dict()
89+
client.pages.create(
90+
parent={"database_id": database_id}, properties=properties
91+
)
92+
93+
94+
def upload_to_database(df, databse_id, schema, client):
95+
for _, row in df[::NOT_REVERSE_DATAFRAME].iterrows():
96+
upload_row_to_database(row, databse_id, schema, client)
97+
98+
99+
def load_database_schema(database_id, client):
100+
return DatabaseSchema.from_raw(
101+
client.databases.retrieve(database_id=database_id)["properties"]
102+
)
103+
104+
105+
def validate_df_with_schema(df, schema):
106+
if hasattr(df, "schema"):
107+
assert df.schema == schema
108+
else:
109+
for col in df.columns:
110+
assert col in schema.configs.keys()
111+
# When DF doesn't have a schema, we just ensure that their
112+
# column names appear in the schema
113+
114+
115+
@use_client
116+
def upload(
117+
df: "pd.DataFrame",
118+
notion_url: str,
119+
schema=None,
120+
mode="a",
121+
title: str = "",
122+
*,
123+
api_key: str = None,
124+
client: Client = None,
125+
):
126+
if not _is_notion_database(notion_url):
127+
df, schema = guess_align_schema_for_df(df)
128+
database_properties = create_database(get_id(notion_url), client, schema, title)
129+
databse_id = database_properties['id']
130+
notion_url = database_properties['url']
131+
else:
132+
databse_id = get_id(notion_url)
133+
if schema is None:
134+
schema = load_database_schema(databse_id, client)
135+
136+
# At this stage, we should have the appropriate schema
137+
assert schema is not None
138+
139+
validate_df_with_schema(df, schema)
140+
141+
if mode not in ("a", "append"):
142+
raise NotImplementedError
143+
# TODO: clean the current values in the notion database (if any)
144+
145+
upload_to_database(df, databse_id, schema, client)
146+
147+
print(f"Your dataframe has been uploaded to the Notion page: {notion_url} .")
148+
return notion_url

0 commit comments

Comments
 (0)