55
66# station_ids = ['ASHW1', 'ENCW1', 'FTAW1']
77
8- station_ids = ['ASHW1' ]
8+ # station_ids = ['ASHW1']
99
1010class WeatherDataLoader :
1111
1212 storage_client : storage .Client = None
1313 bucket_name :str = None
1414 bucket :storage .Bucket = None
1515 credentials_file :str = None
16+ station_list_file :str
17+ station_ids :list = None
1618
1719
1820 def create_storage_client (self ):
@@ -30,7 +32,7 @@ def upload_to_bucket(self, content: str, blob_name: str):
3032
3133 def tick (self ):
3234
33- for station_id in station_ids :
35+ for station_id in self . station_ids :
3436 # station specific url
3537 url = 'https://api.weather.gov/stations/' + station_id + '/observations/latest'
3638
@@ -49,7 +51,7 @@ def tick(self):
4951
5052 # based on the timestamp, determine the blob name
5153 blob_name = self .generate_blob_name (station_id , timestamp_string )
52- print (' blob name is' , blob_name )
54+ print (f'station { station_id } , timestamp is { timestamp_string } , blob name is { blob_name } ' )
5355
5456 # convert from dict to string as storage client can process string without
5557 # us having to store it to local file then upload.
@@ -71,10 +73,10 @@ def generate_blob_name(self, station_id: str, timestamp_string: str):
7173 # construct blob name in GCS
7274 blob_name = 'raw/{year}/{month}/{day}/{hour}_{minute}_{station_id}.json' .format (
7375 year = timestamp .year ,
74- month = timestamp .month ,
75- day = timestamp .day ,
76- hour = timestamp .hour ,
77- minute = timestamp .minute ,
76+ month = str ( timestamp .month ). zfill ( 2 ) ,
77+ day = str ( timestamp .day ). zfill ( 2 ) ,
78+ hour = str ( timestamp .hour ). zfill ( 2 ) ,
79+ minute = str ( timestamp .minute ). zfill ( 2 ) ,
7880 station_id = station_id
7981 )
8082
@@ -83,14 +85,23 @@ def generate_blob_name(self, station_id: str, timestamp_string: str):
8385
8486
8587 def init (self ):
88+ self .station_ids = self .read_station_list ()
8689 self .storage_client = self .create_storage_client ()
8790 self .bucket = self .get_bucket ()
8891
8992
93+ def read_station_list (self ):
94+ with open (self .station_list_file ) as file :
95+ return [line .rstrip () for line in file ]
96+
97+
98+
9099def main ():
91100 loader = WeatherDataLoader ()
92101 loader .credentials_file = './google_credential.json'
93102 loader .bucket_name = 'data-engineering-zoomcamp-2024-project'
103+ loader .station_list_file = 'src/station_list.txt'
104+
94105 loader .init ()
95106 loader .tick ()
96107
0 commit comments