-
Notifications
You must be signed in to change notification settings - Fork 469
/
Copy pathconnect_to_postgres.py
185 lines (141 loc) · 4.48 KB
/
connect_to_postgres.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# /// script
# requires-python = ">=3.9"
# dependencies = [
# "duckdb==1.1.1",
# "marimo",
# "polars==1.18.0",
# "pyarrow==18.1.0",
# "requests==2.32.3",
# ]
# ///
import marimo
__generated_with = "0.9.1"
app = marimo.App(width="medium")
@app.cell
def __(mo):
mo.md(
r"""
# Connect to Postgres
You can use marimo's SQL cells to read from and write to Postgres databases.
The first step is to attach a Postgres database, which we do below.
For advanced usage, see [duckdb's documentation](https://duckdb.org/docs/extensions/postgres).
"""
)
return
@app.cell(hide_code=True)
def __():
import marimo as mo
def download_sample_data():
import os
import requests
url = "https://github.com/lerocha/chinook-database/raw/master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"
filename = "Chinook_Sqlite.sqlite"
if not os.path.exists(filename):
print("Downloading the Chinook database ...")
response = requests.get(url)
with open(filename, "wb") as f:
f.write(response.content)
download_sample_data()
return download_sample_data, mo
@app.cell(hide_code=True)
def __(mo):
mo.accordion(
{
"Tip: Creating SQL Cells": mo.md(
f"""
Create a SQL cell in one of two ways:
1. Click the {mo.icon("lucide:database")} `SQL` button at the **bottom of your notebook**
2. **Right-click** the {mo.icon("lucide:circle-plus")} button to the **left of a cell**, and choose `SQL`.
In the SQL cell, you can query dataframes in your notebook as if
they were tables — just reference them by name.
"""
)
}
)
return
@app.cell
def __():
import os
PASSWORD = os.getenv("PGPASSWORD", "mysecretpassword")
return PASSWORD, os
@app.cell
def __(PASSWORD, mo):
_df = mo.sql(
f"""
-- Boilerplate: detach the database so this cell works when you re-run it
DETACH DATABASE IF EXISTS db;
-- Attach the database; omit READ_ONLY if you want to write to the database.
-- The ATTACH command accepts either a libpq connection string (as below) or a PostgreSQL URI.
-- You can filter to specific schemas, as we do below.
ATTACH 'dbname=postgres user=postgres host=127.0.0.1 password={PASSWORD}' as db (
TYPE POSTGRES, READ_ONLY, SCHEMA 'public'
);
-- View tables in the public schema
SHOW ALL TABLES;
"""
)
return
@app.cell(hide_code=True)
def __(mo):
mo.md(
r"""
Once the database is attached, you can query it with SQL. Note that this involves copying data from Postgres SQL and
executing it in duckdb. See later sections of this example on how to execute queries directly in Postgres.
"""
)
return
@app.cell
def __(db, mo, test_table):
_df = mo.sql(
f"""
-- Query your tables! This assumes a database with schema public and a sample table called test_table.
SELECT * FROM db.public.test_table;
"""
)
return
@app.cell(hide_code=True)
def __(mo):
mo.md(
f"""
You can explore the schemas of all your tables at a glance in the **data sources panel**: click
the {mo.icon("lucide:database")} icon in the left sidebar to open it.
"""
)
return
@app.cell(hide_code=True)
def __(mo):
mo.md(
r"""
## Copy data from Postgres to duckdb
To prevent duckdb from continuously re-reading tables from PostgresSQL, you can copy the PostgresSQL databases into DuckDB. Note that this will consume your system's RAM.
"""
)
return
@app.cell
def __(db, duckdb_table, mo, test_table):
_df = mo.sql(
f"""
CREATE OR REPlACE TABLE duckdb_table AS FROM db.public.test_table;
SELECT * FROM duckdb_table;
"""
)
return (duckdb_table,)
@app.cell(hide_code=True)
def __(mo):
mo.md(
r"""
## Execute queries directly in PostgresSQL
Run queries directly in PostgresSQL using duckdb's `postgres_query` function. In some cases this may be faster than executing queries in duckdb.
"""
)
return
@app.cell
def __(mo):
_df = mo.sql(
f"""
SELECT * FROM postgres_query('db', 'SELECT * FROM test_table');
"""
)
return
if __name__ == "__main__":
app.run()