@@ -251,29 +251,36 @@ def get_all_regions_instance_types_df(regions: Set[str]) -> pd.DataFrame:
251
251
252
252
253
253
# Fetch Images
254
- _GPU_TO_IMAGE_DATE = {
255
- # https://console.aws.amazon.com/ec2/v2/home?region=us-east-1#Images:visibility=public-images;v=3;search=:64,:Ubuntu%2020,:Deep%20Learning%20AMI%20GPU%20PyTorch # pylint: disable=line-too-long
256
- # Current AMIs:
257
- # Deep Learning AMI GPU PyTorch 1.10.0 (Ubuntu 20.04) 20220308
258
- # Nvidia driver: 510.47.03, CUDA Version: 11.6 (does not support torch==1.13.0+cu117)
259
- #
260
- # Use a list to fallback to newer AMI, as some regions like ap-southeast-3 does not have
261
- # the older AMI.
262
- 'gpu' : ['20220308' , '20221101' ],
263
- # Deep Learning AMI GPU PyTorch 1.10.0 (Ubuntu 20.04) 20211208
264
- # Downgrade the AMI for K80 due as it is only compatible with
265
- # NVIDIA driver lower than 470.
266
- 'k80' : ['20211208' ]
267
- }
268
- _UBUNTU_VERSION = ['18.04' , '20.04' ]
269
-
270
-
271
- def _fetch_image_id (region : str , ubuntu_version : str ,
272
- creation_date : str ) -> Optional [str ]:
254
+ # https://console.aws.amazon.com/ec2/v2/home?region=us-east-1#Images:visibility=public-images;v=3;search=:64,:Ubuntu%2020,:Deep%20Learning%20AMI%20GPU%20PyTorch # pylint: disable=line-too-long
255
+ # Current AMIs (we have to use different PyTorch versions for different OS as Ubuntu 18.04
256
+ # does not have the latest PyTorch version):
257
+ # GPU:
258
+ # Deep Learning AMI GPU PyTorch 1.13.1 (Ubuntu 20.04) 20230103
259
+ # Nvidia driver: 515.65.01, CUDA Version: 11.7
260
+ #
261
+ # Deep Learning AMI GPU PyTorch 1.10.0 (Ubuntu 18.04) 20221114
262
+ # Nvidia driver: 510.47.03, CUDA Version: 11.6
263
+ #
264
+ # K80:
265
+ # Deep Learning AMI GPU PyTorch 1.10.0 (Ubuntu 20.04) 20211208
266
+ # Nvidia driver: 470.57.02, CUDA Version: 11.4
267
+ #
268
+ # Deep Learning AMI GPU PyTorch 1.10.0 (Ubuntu 18.04) 20211208
269
+ # Nvidia driver: 470.57.02, CUDA Version: 11.4
270
+ _GPU_UBUNTU_DATE_PYTORCH = [
271
+ ('gpu' , '20.04' , '20230103' , '1.13.1' ),
272
+ ('gpu' , '18.04' , '20221114' , '1.10.0' ),
273
+ ('k80' , '20.04' , '20211208' , '1.10.0' ),
274
+ ('k80' , '18.04' , '20211208' , '1.10.0' ),
275
+ ]
276
+
277
+
278
+ def _fetch_image_id (region : str , ubuntu_version : str , creation_date : str ,
279
+ pytorch_version : str ) -> Optional [str ]:
273
280
try :
274
281
image = subprocess .check_output (f"""\
275
282
aws ec2 describe-images --region { region } --owners amazon \\
276
- --filters 'Name=name,Values="Deep Learning AMI GPU PyTorch 1.10.0 (Ubuntu { ubuntu_version } ) { creation_date } "' \\
283
+ --filters 'Name=name,Values="Deep Learning AMI GPU PyTorch { pytorch_version } (Ubuntu { ubuntu_version } ) { creation_date } "' \\
277
284
'Name=state,Values=available' --query 'Images[:1].ImageId' --output text
278
285
""" ,
279
286
shell = True )
@@ -290,33 +297,25 @@ def _fetch_image_id(region: str, ubuntu_version: str,
290
297
291
298
@ray .remote
292
299
def _get_image_row (
293
- region : str , ubuntu_version : str ,
294
- cpu_or_gpu : str ) -> Tuple [str , str , str , str , Optional [str ], str ]:
295
- print (f'Getting image for { region } , { ubuntu_version } , { cpu_or_gpu } ' )
296
- creation_date = _GPU_TO_IMAGE_DATE [cpu_or_gpu ]
297
- date = None
298
- for date in creation_date :
299
- image_id = _fetch_image_id (region , ubuntu_version , date )
300
- if image_id :
301
- break
302
- else :
300
+ region : str , gpu : str , ubuntu_version : str , date : str ,
301
+ pytorch_version ) -> Tuple [str , str , str , str , Optional [str ], str ]:
302
+ print (f'Getting image for { region } , { ubuntu_version } , { gpu } ' )
303
+ image_id = _fetch_image_id (region , ubuntu_version , date , pytorch_version )
304
+ if image_id is None :
303
305
# not found
304
- print (
305
- f'Failed to find image for { region } , { ubuntu_version } , { cpu_or_gpu } '
306
- )
307
- if date is None :
308
- raise ValueError (f'Could not find the creation date for { cpu_or_gpu } .' )
309
- tag = f'skypilot:{ cpu_or_gpu } -ubuntu-{ ubuntu_version .replace ("." , "" )} '
306
+ print (f'Failed to find image for { region } , { ubuntu_version } , { gpu } ' )
307
+ tag = f'skypilot:{ gpu } -ubuntu-{ ubuntu_version .replace ("." , "" )} '
310
308
return tag , region , 'ubuntu' , ubuntu_version , image_id , date
311
309
312
310
313
311
def get_all_regions_images_df (regions : Set [str ]) -> pd .DataFrame :
314
312
workers = []
315
- for cpu_or_gpu in _GPU_TO_IMAGE_DATE :
316
- for ubuntu_version in _UBUNTU_VERSION :
317
- for region in regions :
318
- workers .append (
319
- _get_image_row .remote (region , ubuntu_version , cpu_or_gpu ))
313
+ for (gpu , ubuntu_version , date ,
314
+ pytorch_version ) in _GPU_UBUNTU_DATE_PYTORCH :
315
+ for region in regions :
316
+ workers .append (
317
+ _get_image_row .remote (region , gpu , ubuntu_version , date ,
318
+ pytorch_version ))
320
319
321
320
results = ray .get (workers )
322
321
results = pd .DataFrame (
0 commit comments