Skip to content

Commit c51d058

Browse files
authored
Added more stations and clean up (#2)
1 parent 1165cad commit c51d058

File tree

4 files changed

+69
-9
lines changed

4 files changed

+69
-9
lines changed

.github/workflows/fetch.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ name: Fetch Data
22

33
on:
44
workflow_dispatch:
5+
schedule:
6+
- cron: "0 * * * *"
57

68
defaults:
79
run:
@@ -29,7 +31,23 @@ jobs:
2931
credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'
3032

3133

34+
- name: Install dependencies
35+
run: |
36+
pip install -r src/requirements.txt
37+
38+
3239
- name: execute
40+
env:
41+
SECRETS_VARS: ${{ secrets.GOOGLE_CREDENTIALS }}
3342
run: |
43+
echo "$SECRETS_VARS" > google_credential.json
44+
ls -l
3445
python src/weather_data_loader.py
3546
47+
48+
- name: Clean up
49+
if: always()
50+
run: |
51+
rm -f google_credential.json || true
52+
53+

src/requirements.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
11
requests==2.31.0
2-
pandas==2.2.1
3-
pyarrow==15.0.2
42
google-cloud-storage==2.16.0

src/station_list.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
ASHW1
2+
ENCW1
3+
FTAW1
4+
HZCW1
5+
KAWO
6+
KBFI
7+
KBLI
8+
KBVS
9+
KCLM
10+
KCLS
11+
KFHR
12+
KHQM
13+
KNUW
14+
KORS
15+
KPAE
16+
KPLU
17+
KPWT
18+
KRNT
19+
KSEA
20+
KSHN
21+
KTCM
22+
KTIW
23+
KUIL
24+
MBMW1
25+
QCNW1
26+
SUN64
27+
SWYW1
28+
TALPE
29+
THECK
30+
TR950
31+
TSR18
32+
TSTEV
33+
WAP55

src/weather_data_loader.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,16 @@
55

66
# station_ids = ['ASHW1', 'ENCW1', 'FTAW1']
77

8-
station_ids = ['ASHW1']
8+
# station_ids = ['ASHW1']
99

1010
class WeatherDataLoader:
1111

1212
storage_client: storage.Client=None
1313
bucket_name:str=None
1414
bucket:storage.Bucket=None
1515
credentials_file:str=None
16+
station_list_file:str
17+
station_ids:list=None
1618

1719

1820
def create_storage_client(self):
@@ -30,7 +32,7 @@ def upload_to_bucket(self, content: str, blob_name: str):
3032

3133
def tick(self):
3234

33-
for station_id in station_ids:
35+
for station_id in self.station_ids:
3436
# station specific url
3537
url = 'https://api.weather.gov/stations/' + station_id + '/observations/latest'
3638

@@ -49,7 +51,7 @@ def tick(self):
4951

5052
# based on the timestamp, determine the blob name
5153
blob_name = self.generate_blob_name(station_id, timestamp_string)
52-
print('blob name is', blob_name)
54+
print(f'station {station_id}, timestamp is {timestamp_string}, blob name is {blob_name}')
5355

5456
# convert from dict to string as storage client can process string without
5557
# us having to store it to local file then upload.
@@ -71,10 +73,10 @@ def generate_blob_name(self, station_id: str, timestamp_string: str):
7173
# construct blob name in GCS
7274
blob_name = 'raw/{year}/{month}/{day}/{hour}_{minute}_{station_id}.json'.format(
7375
year=timestamp.year,
74-
month=timestamp.month,
75-
day=timestamp.day,
76-
hour=timestamp.hour,
77-
minute=timestamp.minute,
76+
month=str(timestamp.month).zfill(2),
77+
day=str(timestamp.day).zfill(2),
78+
hour=str(timestamp.hour).zfill(2),
79+
minute=str(timestamp.minute).zfill(2),
7880
station_id=station_id
7981
)
8082

@@ -83,14 +85,23 @@ def generate_blob_name(self, station_id: str, timestamp_string: str):
8385

8486

8587
def init(self):
88+
self.station_ids = self.read_station_list()
8689
self.storage_client = self.create_storage_client()
8790
self.bucket = self.get_bucket()
8891

8992

93+
def read_station_list(self):
94+
with open(self.station_list_file) as file:
95+
return [line.rstrip() for line in file]
96+
97+
98+
9099
def main():
91100
loader=WeatherDataLoader()
92101
loader.credentials_file='./google_credential.json'
93102
loader.bucket_name='data-engineering-zoomcamp-2024-project'
103+
loader.station_list_file='src/station_list.txt'
104+
94105
loader.init()
95106
loader.tick()
96107

0 commit comments

Comments
 (0)