Skip to content

Commit 8a58ad2

Browse files
committed
feat: add dbt project from other repository
1 parent db4f409 commit 8a58ad2

File tree

1,024 files changed

+2061061
-1
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,024 files changed

+2061061
-1
lines changed

.gitignore

+175-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,178 @@ dist/
33
docs/.observablehq/cache/
44
node_modules/
55
yarn-error.log
6-
.vscode/
6+
.vscode/
7+
8+
# Duckdb
9+
*.duckdb
10+
*.duckdb.tmp/
11+
*.duckdb.wal
12+
13+
.gitkeep
14+
15+
# MacOS
16+
.DS_Store
17+
18+
# Venv
19+
.venv/
20+
21+
# Byte-compiled / optimized / DLL files
22+
__pycache__/
23+
*.py[cod]
24+
*$py.class
25+
26+
# C extensions
27+
*.so
28+
29+
# Distribution / packaging
30+
.Python
31+
build/
32+
develop-eggs/
33+
dist/
34+
downloads/
35+
eggs/
36+
.eggs/
37+
lib/
38+
lib64/
39+
parts/
40+
sdist/
41+
var/
42+
wheels/
43+
share/python-wheels/
44+
*.egg-info/
45+
.installed.cfg
46+
*.egg
47+
MANIFEST
48+
49+
# PyInstaller
50+
# Usually these files are written by a python script from a template
51+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
52+
*.manifest
53+
*.spec
54+
55+
# Installer logs
56+
pip-log.txt
57+
pip-delete-this-directory.txt
58+
59+
# Unit test / coverage reports
60+
htmlcov/
61+
.tox/
62+
.nox/
63+
.coverage
64+
.coverage.*
65+
.cache
66+
nosetests.xml
67+
coverage.xml
68+
*.cover
69+
*.py,cover
70+
.hypothesis/
71+
.pytest_cache/
72+
cover/
73+
74+
# Translations
75+
*.mo
76+
*.pot
77+
78+
# Django stuff:
79+
*.log
80+
local_settings.py
81+
db.sqlite3
82+
db.sqlite3-journal
83+
84+
# Flask stuff:
85+
instance/
86+
.webassets-cache
87+
88+
# Scrapy stuff:
89+
.scrapy
90+
91+
# Sphinx documentation
92+
docs/_build/
93+
94+
# PyBuilder
95+
.pybuilder/
96+
target/
97+
98+
# Jupyter Notebook
99+
.ipynb_checkpoints
100+
101+
# IPython
102+
profile_default/
103+
ipython_config.py
104+
105+
# pyenv
106+
# For a library or package, you might want to ignore these files since the code is
107+
# intended to run in multiple environments; otherwise, check them in:
108+
# .python-version
109+
110+
# pipenv
111+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
112+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
113+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
114+
# install all needed dependencies.
115+
#Pipfile.lock
116+
117+
# poetry
118+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
119+
# This is especially recommended for binary packages to ensure reproducibility, and is more
120+
# commonly ignored for libraries.
121+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
122+
#poetry.lock
123+
124+
# pdm
125+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
126+
#pdm.lock
127+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
128+
# in version control.
129+
# https://pdm.fming.dev/#use-with-ide
130+
.pdm.toml
131+
132+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
133+
__pypackages__/
134+
135+
# Celery stuff
136+
celerybeat-schedule
137+
celerybeat.pid
138+
139+
# SageMath parsed files
140+
*.sage.py
141+
142+
# Environments
143+
.env
144+
.venv
145+
env/
146+
venv/
147+
ENV/
148+
env.bak/
149+
venv.bak/
150+
151+
# Spyder project settings
152+
.spyderproject
153+
.spyproject
154+
155+
# Rope project settings
156+
.ropeproject
157+
158+
# mkdocs documentation
159+
/site
160+
161+
# mypy
162+
.mypy_cache/
163+
.dmypy.json
164+
dmypy.json
165+
166+
# Pyre type checker
167+
.pyre/
168+
169+
# pytype static type analyzer
170+
.pytype/
171+
172+
# Cython debug symbols
173+
cython_debug/
174+
175+
# PyCharm
176+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
177+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
178+
# and can be added to the global gitignore or merged into this file. For a more nuclear
179+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
180+
#.idea/

data_processing/.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
2+
target/
3+
dbt_packages/
4+
logs/

data_processing/dbt_project.yml

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Name your project! Project names should contain only lowercase characters
2+
# and underscores. A good package name should reflect your organization's
3+
# name or the intended use of these models
4+
name: "american_community_survey"
5+
version: "1.0.0"
6+
config-version: 2
7+
8+
# This setting configures which "profile" dbt uses for this project.
9+
profile: "american_community_survey"
10+
11+
# Variables that can be changed from the command line using the `--vars` flag:
12+
# example: dbt run --vars 'my_variable: my_value'
13+
vars:
14+
# The URL of the American Community Survey PUMS data
15+
public_use_microdata_sample_url: "https://www2.census.gov/programs-surveys/acs/data/pums/2022/1-Year/"
16+
public_use_microdata_sample_data_dictionary_url: "https://www2.census.gov/programs-surveys/acs/tech_docs/pums/data_dict/PUMS_Data_Dictionary_2022.csv"
17+
microdata_area_shapefile_url: "https://www2.census.gov/geo/tiger/TIGER2010/PUMA5/2010/" # https://www2.census.gov/geo/tiger/TIGER2010/PUMA5/2010/ # https://www2.census.gov/geo/tiger/TIGER2020/PUMA20/
18+
output_path: "~/data/american_community_survey"
19+
20+
# These configurations specify where dbt should look for different types of files.
21+
# The `model-paths` config, for example, states that models in this project can be
22+
# found in the "models/" directory. You probably won't need to change these!
23+
model-paths: ["models"]
24+
analysis-paths: ["analyses"]
25+
test-paths: ["tests"]
26+
seed-paths: ["seeds"]
27+
macro-paths: ["macros"]
28+
snapshot-paths: ["snapshots"]
29+
30+
clean-targets: # directories to be removed by `dbt clean`
31+
- "target"
32+
- "dbt_packages"
33+
34+
# Configuring models
35+
# Full documentation: https://docs.getdbt.com/docs/configuring-models
36+
37+
# In this example config, we tell dbt to build all models in the example/
38+
# directory as views. These settings can be overridden in the individual model
39+
# files using the `{{ config(...) }}` macro.
40+
models:
41+
american_community_survey:
42+
# Config indicated by + and applies to all files under models/example/
43+
# example:
44+
+materialized: view
45+
public_use_microdata_sample:
46+
generated:
47+
+tags: "generated"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{% macro generate_industry_mapping_2003_onwards_sql(input_field) %}
2+
{% set industry_mappings = get_industry_mappings_2003_onwards() %}
3+
4+
CASE {{ input_field }}
5+
{% for code, description in industry_mappings.items() %}
6+
WHEN '{{ code }}' THEN '{{ description }}'
7+
{% endfor %}
8+
END::ENUM ({% for description in industry_mappings.values() | unique %}'{{ description }}'{{ "," if not loop.last }}{% endfor %})
9+
{% endmacro %}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{% macro generate_industry_mapping_before_2003_sql(input_field) %}
2+
{% set industry_mappings = get_industry_mappings_before_2003() %}
3+
4+
CASE {{ input_field }}
5+
{% for code, description in industry_mappings.items() %}
6+
WHEN '{{ code }}' THEN '{{ description }}'
7+
{% endfor %}
8+
END::ENUM ({% for description in industry_mappings.values() | unique %}'{{ description }}'{{ "," if not loop.last }}{% endfor %})
9+
{% endmacro %}

0 commit comments

Comments
 (0)