Skip to content

Commit

Permalink
initial checkin (#1)
Browse files Browse the repository at this point in the history
  • Loading branch information
cch-k authored Mar 21, 2024
1 parent 4a5853f commit 1165cad
Show file tree
Hide file tree
Showing 8 changed files with 530 additions and 0 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/fetch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Fetch Data

on:
workflow_dispatch:

defaults:
run:
shell: bash

jobs:
on_schedule:
name: Scheduled Run

runs-on: ubuntu-22.04

steps:
- name: Checkout source
uses: actions/checkout@v4
id: checkout-source


- uses: actions/setup-python@v5
with:
python-version: '3.11'


- uses: 'google-github-actions/auth@v2'
with:
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'


- name: execute
run: |
python src/weather_data_loader.py
155 changes: 155 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# Byte-compiled / optimized / DLL files
**/__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
# .env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

**/venv/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# locust stats files
false_*.csv
true_*.csv

#intellij
*.iml
*.idea

envs/

# OS files
.DS_Store


google_credential.json
application_default_credentials.json

**/.terraform
terraform/.terraform.lock.hcl
terraform/terraform.tfstate
terraform/terraform.tfstate.backup


21 changes: 21 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@

# Variables
TERRAFORM_DIR := terraform
CURRENT_DIR := $(shell pwd)

.PHONY: list
list:
@LC_ALL=C $(MAKE) -pRrq -f $(firstword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/(^|\n)# Files(\n|$$)/,/(^|\n)# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$'


tf_init:
cd $(TERRAFORM_DIR) && terraform init ; cd $(CURRENT_DIR)


tf_plan:
cd $(TERRAFORM_DIR) && terraform plan ; cd $(CURRENT_DIR)

tf_apply:
cd $(TERRAFORM_DIR) && terraform apply ; cd $(CURRENT_DIR)


137 changes: 137 additions & 0 deletions note.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@

#

make tf_plan output


```
Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
+ create
Terraform will perform the following actions:
# google_bigquery_dataset.project_dataset will be created
+ resource "google_bigquery_dataset" "project_dataset" {
+ creation_time = (known after apply)
+ dataset_id = "project_dataset"
+ default_collation = (known after apply)
+ delete_contents_on_destroy = false
+ effective_labels = (known after apply)
+ etag = (known after apply)
+ id = (known after apply)
+ is_case_insensitive = (known after apply)
+ last_modified_time = (known after apply)
+ location = "US"
+ max_time_travel_hours = (known after apply)
+ project = "axial-gist-411121"
+ self_link = (known after apply)
+ storage_billing_model = (known after apply)
+ terraform_labels = (known after apply)
}
# google_storage_bucket.project-bucket will be created
+ resource "google_storage_bucket" "project-bucket" {
+ effective_labels = (known after apply)
+ force_destroy = true
+ id = (known after apply)
+ location = "US"
+ name = "data-engineering-zoomcamp-2024-project"
+ project = (known after apply)
+ public_access_prevention = (known after apply)
+ self_link = (known after apply)
+ storage_class = "STANDARD"
+ terraform_labels = (known after apply)
+ uniform_bucket_level_access = (known after apply)
+ url = (known after apply)
+ lifecycle_rule {
+ action {
+ type = "AbortIncompleteMultipartUpload"
}
+ condition {
+ age = 1
+ matches_prefix = []
+ matches_storage_class = []
+ matches_suffix = []
+ with_state = (known after apply)
}
}
}
Plan: 2 to add, 0 to change, 0 to destroy.
```

# make tf_apply output


```
Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
+ create
Terraform will perform the following actions:
# google_bigquery_dataset.project_dataset will be created
+ resource "google_bigquery_dataset" "project_dataset" {
+ creation_time = (known after apply)
+ dataset_id = "project_dataset"
+ default_collation = (known after apply)
+ delete_contents_on_destroy = false
+ effective_labels = (known after apply)
+ etag = (known after apply)
+ id = (known after apply)
+ is_case_insensitive = (known after apply)
+ last_modified_time = (known after apply)
+ location = "US"
+ max_time_travel_hours = (known after apply)
+ project = "axial-gist-411121"
+ self_link = (known after apply)
+ storage_billing_model = (known after apply)
+ terraform_labels = (known after apply)
}
# google_storage_bucket.project-bucket will be created
+ resource "google_storage_bucket" "project-bucket" {
+ effective_labels = (known after apply)
+ force_destroy = true
+ id = (known after apply)
+ location = "US"
+ name = "data-engineering-zoomcamp-2024-project"
+ project = (known after apply)
+ public_access_prevention = (known after apply)
+ self_link = (known after apply)
+ storage_class = "STANDARD"
+ terraform_labels = (known after apply)
+ uniform_bucket_level_access = (known after apply)
+ url = (known after apply)
+ lifecycle_rule {
+ action {
+ type = "AbortIncompleteMultipartUpload"
}
+ condition {
+ age = 1
+ matches_prefix = []
+ matches_storage_class = []
+ matches_suffix = []
+ with_state = (known after apply)
}
}
}
Plan: 2 to add, 0 to change, 0 to destroy.
Do you want to perform these actions?
Terraform will perform the actions described above.
Only 'yes' will be accepted to approve.
Enter a value: yes
google_bigquery_dataset.project_dataset: Creating...
google_storage_bucket.project-bucket: Creating...
google_bigquery_dataset.project_dataset: Creation complete after 1s [id=projects/axial-gist-411121/datasets/project_dataset]
google_storage_bucket.project-bucket: Creation complete after 1s [id=data-engineering-zoomcamp-2024-project]
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
```
4 changes: 4 additions & 0 deletions src/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
requests==2.31.0
pandas==2.2.1
pyarrow==15.0.2
google-cloud-storage==2.16.0
Loading

0 comments on commit 1165cad

Please sign in to comment.