Skip to content

Commit 39333ba

Browse files
committed
Add nycflights dataset
1 parent 03502b1 commit 39333ba

File tree

6 files changed

+336787
-1
lines changed

6 files changed

+336787
-1
lines changed

data_raw/nycflights.csv

Lines changed: 336777 additions & 0 deletions
Large diffs are not rendered by default.

data_raw/nycflights.ddb

7.01 MB
Binary file not shown.

data_raw/x-02-duckdb.qmd

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ tbl_dates_times_text = pl.DataFrame(
2121
)
2222
small_table = pb.load_dataset(dataset="small_table", tbl_type="polars")
2323
game_revenue = pb.load_dataset(dataset="game_revenue", tbl_type="polars")
24+
nycflights = pb.load_dataset(dataset="nycflights", tbl_type="polars")
2425
```
2526

2627

@@ -59,3 +60,10 @@ with duckdb.connect(database="game_revenue.ddb", read_only=False) as con:
5960
CREATE TABLE IF NOT EXISTS 'game_revenue' AS SELECT * FROM game_revenue;
6061
""")
6162
```
63+
64+
```{python}
65+
with duckdb.connect(database="nycflights.ddb", read_only=False) as con:
66+
con.execute(f"""
67+
CREATE TABLE IF NOT EXISTS 'nycflights' AS SELECT * FROM nycflights;
68+
""")
69+
```

pointblank/data/nycflights-duckdb.zip

5.05 MB
Binary file not shown.

pointblank/data/nycflights.zip

7.47 MB
Binary file not shown.

pointblank/validate.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ def load_dataset(
202202
"""
203203

204204
# Raise an error if the dataset is from the list of provided datasets
205-
if dataset not in ["small_table", "game_revenue"]:
205+
if dataset not in ["small_table", "game_revenue", "nycflights"]:
206206
raise ValueError(
207207
f"The dataset name `{dataset}` is not valid. Choose one of the following:\n"
208208
"- `small_table`\n"
@@ -245,6 +245,7 @@ def load_dataset(
245245
parse_date_columns = {
246246
"small_table": ["date_time", "date"],
247247
"game_revenue": ["session_start", "time", "start_day"],
248+
"nycflights": [],
248249
}
249250

250251
dataset = pd.read_csv(data_path, parse_dates=parse_date_columns[dataset])

0 commit comments

Comments
 (0)