5
5
6
6
# station_ids = ['ASHW1', 'ENCW1', 'FTAW1']
7
7
8
- station_ids = ['ASHW1' ]
8
+ # station_ids = ['ASHW1']
9
9
10
10
class WeatherDataLoader :
11
11
12
12
storage_client : storage .Client = None
13
13
bucket_name :str = None
14
14
bucket :storage .Bucket = None
15
15
credentials_file :str = None
16
+ station_list_file :str
17
+ station_ids :list = None
16
18
17
19
18
20
def create_storage_client (self ):
@@ -30,7 +32,7 @@ def upload_to_bucket(self, content: str, blob_name: str):
30
32
31
33
def tick (self ):
32
34
33
- for station_id in station_ids :
35
+ for station_id in self . station_ids :
34
36
# station specific url
35
37
url = 'https://api.weather.gov/stations/' + station_id + '/observations/latest'
36
38
@@ -49,7 +51,7 @@ def tick(self):
49
51
50
52
# based on the timestamp, determine the blob name
51
53
blob_name = self .generate_blob_name (station_id , timestamp_string )
52
- print (' blob name is' , blob_name )
54
+ print (f'station { station_id } , timestamp is { timestamp_string } , blob name is { blob_name } ' )
53
55
54
56
# convert from dict to string as storage client can process string without
55
57
# us having to store it to local file then upload.
@@ -71,10 +73,10 @@ def generate_blob_name(self, station_id: str, timestamp_string: str):
71
73
# construct blob name in GCS
72
74
blob_name = 'raw/{year}/{month}/{day}/{hour}_{minute}_{station_id}.json' .format (
73
75
year = timestamp .year ,
74
- month = timestamp .month ,
75
- day = timestamp .day ,
76
- hour = timestamp .hour ,
77
- minute = timestamp .minute ,
76
+ month = str ( timestamp .month ). zfill ( 2 ) ,
77
+ day = str ( timestamp .day ). zfill ( 2 ) ,
78
+ hour = str ( timestamp .hour ). zfill ( 2 ) ,
79
+ minute = str ( timestamp .minute ). zfill ( 2 ) ,
78
80
station_id = station_id
79
81
)
80
82
@@ -83,14 +85,23 @@ def generate_blob_name(self, station_id: str, timestamp_string: str):
83
85
84
86
85
87
def init (self ):
88
+ self .station_ids = self .read_station_list ()
86
89
self .storage_client = self .create_storage_client ()
87
90
self .bucket = self .get_bucket ()
88
91
89
92
93
+ def read_station_list (self ):
94
+ with open (self .station_list_file ) as file :
95
+ return [line .rstrip () for line in file ]
96
+
97
+
98
+
90
99
def main ():
91
100
loader = WeatherDataLoader ()
92
101
loader .credentials_file = './google_credential.json'
93
102
loader .bucket_name = 'data-engineering-zoomcamp-2024-project'
103
+ loader .station_list_file = 'src/station_list.txt'
104
+
94
105
loader .init ()
95
106
loader .tick ()
96
107
0 commit comments