Skip to content

Commit

Permalink
Added more stations and clean up (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
cch-k authored Mar 21, 2024
1 parent 1165cad commit c51d058
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 9 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/fetch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ name: Fetch Data

on:
workflow_dispatch:
schedule:
- cron: "0 * * * *"

defaults:
run:
Expand Down Expand Up @@ -29,7 +31,23 @@ jobs:
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'


- name: Install dependencies
run: |
pip install -r src/requirements.txt
- name: execute
env:
SECRETS_VARS: ${{ secrets.GOOGLE_CREDENTIALS }}
run: |
echo "$SECRETS_VARS" > google_credential.json
ls -l
python src/weather_data_loader.py
- name: Clean up
if: always()
run: |
rm -f google_credential.json || true
2 changes: 0 additions & 2 deletions src/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
requests==2.31.0
pandas==2.2.1
pyarrow==15.0.2
google-cloud-storage==2.16.0
33 changes: 33 additions & 0 deletions src/station_list.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
ASHW1
ENCW1
FTAW1
HZCW1
KAWO
KBFI
KBLI
KBVS
KCLM
KCLS
KFHR
KHQM
KNUW
KORS
KPAE
KPLU
KPWT
KRNT
KSEA
KSHN
KTCM
KTIW
KUIL
MBMW1
QCNW1
SUN64
SWYW1
TALPE
THECK
TR950
TSR18
TSTEV
WAP55
25 changes: 18 additions & 7 deletions src/weather_data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@

# station_ids = ['ASHW1', 'ENCW1', 'FTAW1']

station_ids = ['ASHW1']
# station_ids = ['ASHW1']

class WeatherDataLoader:

storage_client: storage.Client=None
bucket_name:str=None
bucket:storage.Bucket=None
credentials_file:str=None
station_list_file:str
station_ids:list=None


def create_storage_client(self):
Expand All @@ -30,7 +32,7 @@ def upload_to_bucket(self, content: str, blob_name: str):

def tick(self):

for station_id in station_ids:
for station_id in self.station_ids:
# station specific url
url = 'https://api.weather.gov/stations/' + station_id + '/observations/latest'

Expand All @@ -49,7 +51,7 @@ def tick(self):

# based on the timestamp, determine the blob name
blob_name = self.generate_blob_name(station_id, timestamp_string)
print('blob name is', blob_name)
print(f'station {station_id}, timestamp is {timestamp_string}, blob name is {blob_name}')

# convert from dict to string as storage client can process string without
# us having to store it to local file then upload.
Expand All @@ -71,10 +73,10 @@ def generate_blob_name(self, station_id: str, timestamp_string: str):
# construct blob name in GCS
blob_name = 'raw/{year}/{month}/{day}/{hour}_{minute}_{station_id}.json'.format(
year=timestamp.year,
month=timestamp.month,
day=timestamp.day,
hour=timestamp.hour,
minute=timestamp.minute,
month=str(timestamp.month).zfill(2),
day=str(timestamp.day).zfill(2),
hour=str(timestamp.hour).zfill(2),
minute=str(timestamp.minute).zfill(2),
station_id=station_id
)

Expand All @@ -83,14 +85,23 @@ def generate_blob_name(self, station_id: str, timestamp_string: str):


def init(self):
self.station_ids = self.read_station_list()
self.storage_client = self.create_storage_client()
self.bucket = self.get_bucket()


def read_station_list(self):
with open(self.station_list_file) as file:
return [line.rstrip() for line in file]



def main():
loader=WeatherDataLoader()
loader.credentials_file='./google_credential.json'
loader.bucket_name='data-engineering-zoomcamp-2024-project'
loader.station_list_file='src/station_list.txt'

loader.init()
loader.tick()

Expand Down

0 comments on commit c51d058

Please sign in to comment.