|
89 | 89 | "source": [
|
90 | 90 | "# Create our cluster and submit\n",
|
91 | 91 | "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n",
|
92 |
| - "cluster = Cluster(ClusterConfiguration(name='hfgputest', \n", |
| 92 | + "cluster_name= \"hfgputest\"\n", |
| 93 | + "cluster = Cluster(ClusterConfiguration(name=cluster_name, \n", |
93 | 94 | " head_gpus=1, # For GPU enabled workloads set the head_gpus and num_gpus\n",
|
94 | 95 | " num_gpus=1,\n",
|
95 | 96 | " num_workers=1,\n",
|
|
287 | 288 | "ray_cluster_uri = cluster.cluster_uri()"
|
288 | 289 | ]
|
289 | 290 | },
|
| 291 | + { |
| 292 | + "cell_type": "markdown", |
| 293 | + "id": "64d65c3c", |
| 294 | + "metadata": {}, |
| 295 | + "source": [ |
| 296 | + "Now we can connect directly to our Ray cluster via the Ray python client:" |
| 297 | + ] |
| 298 | + }, |
| 299 | + { |
| 300 | + "cell_type": "code", |
| 301 | + "execution_count": null, |
| 302 | + "id": "60276d86", |
| 303 | + "metadata": {}, |
| 304 | + "outputs": [], |
| 305 | + "source": [ |
| 306 | + "from codeflare_sdk import generate_cert\n", |
| 307 | + "# Create required TLS cert and export the environment variables to enable TLS\n", |
| 308 | + "generate_cert.generate_tls_cert(cluster_name, cluster.config.namespace)\n", |
| 309 | + "generate_cert.export_env(cluster_name, cluster.config.namespace)" |
| 310 | + ] |
| 311 | + }, |
290 | 312 | {
|
291 | 313 | "cell_type": "markdown",
|
292 | 314 | "id": "44dba6a0-8275-4726-8911-6b6ec467b6a3",
|
|
432 | 454 | "\n",
|
433 | 455 | " ray_trainer = TorchTrainer(\n",
|
434 | 456 | " train_func,\n",
|
435 |
| - " scaling_config=ScalingConfig(num_workers=3, use_gpu=True),\n", |
| 457 | + " scaling_config=ScalingConfig(num_workers=2, use_gpu=True),\n", |
436 | 458 | " # Configure persistent storage that is accessible across \n",
|
437 | 459 | " # all worker nodes.\n",
|
438 | 460 | " # Uncomment and update the RunConfig below to include your storage details.\n",
|
|
0 commit comments