Add unit tests for PyTorchJob as Trial

andreyvelich · andreyvelich · commit e04213f3cd32 · 2025-02-12T16:22:28.000Z
Signed-off-by: Andrey Velichkevich &lt;andrey.velichkevich@gmail.com&gt;
diff --git a/sdk/python/v1beta1/kubeflow/katib/api/katib_client_test.py b/sdk/python/v1beta1/kubeflow/katib/api/katib_client_test.py
@@ -18,12 +18,14 @@
     V1beta1TrialTemplate,
 )
 from kubeflow.katib.constants import constants
+from kubeflow.katib.types import types
 from kubeflow.storage_initializer.hugging_face import (
     HuggingFaceDatasetParams,
     HuggingFaceModelParams,
     HuggingFaceTrainerParams,
 )
-from kubernetes.client import V1ObjectMeta
+from kubeflow.training.models import KubeflowOrgV1PyTorchJob
+from kubernetes.client import V1Job, V1ObjectMeta
 
 PVC_FAILED = "pvc creation failed"
 
@@ -476,16 +478,37 @@ def create_experiment(
                     learning_rate=katib.search.double(min=1e-05, max=5e-05),
                 ),
             ),
+            "resources_per_trial": types.TrainerResources(
+                num_workers=2,
+                num_procs_per_worker=2,
+                resources_per_worker={"gpu": "2"},
+            ),
         },
         RuntimeError,
     ),
     (
-        "valid flow with custom objective tuning",
+        "valid flow with custom objective function and Job as Trial",
         {
             "name": "tune_test",
             "objective": lambda x: print(f"a={x}"),
             "parameters": {"a": katib.search.int(min=10, max=100)},
             "objective_metric_name": "a",
+            "resources_per_trial": {"gpu": "2"},
+        },
+        TEST_RESULT_SUCCESS,
+    ),
+    (
+        "valid flow with custom objective function and PyTorchJob as Trial",
+        {
+            "name": "tune_test",
+            "objective": lambda x: print(f"a={x}"),
+            "parameters": {"a": katib.search.int(min=10, max=100)},
+            "objective_metric_name": "a",
+            "resources_per_trial": types.TrainerResources(
+                num_workers=2,
+                num_procs_per_worker=2,
+                resources_per_worker={"gpu": "2"},
+            ),
         },
         TEST_RESULT_SUCCESS,
     ),
@@ -508,6 +531,11 @@ def create_experiment(
                     learning_rate=katib.search.double(min=1e-05, max=5e-05),
                 ),
             ),
+            "resources_per_trial": types.TrainerResources(
+                num_workers=2,
+                num_procs_per_worker=2,
+                resources_per_worker={"gpu": "2"},
+            ),
             "objective_metric_name": "train_loss",
             "objective_type": "minimize",
         },
@@ -597,7 +625,10 @@ def test_tune(katib_client, test_name, kwargs, expected_output):
                 call_args = mock_create_experiment.call_args
                 experiment = call_args[0][0]
 
-                if test_name == "valid flow with custom objective tuning":
+                if (
+                    test_name
+                    == "valid flow with custom objective function and Job as Trial"
+                ):
                     # Verify input_params
                     args_content = "".join(
                         experiment.spec.trial_template.trial_spec.spec.template.spec.containers[
@@ -623,6 +654,18 @@ def test_tune(katib_client, test_name, kwargs, expected_output):
                         objective_metric_name="a",
                         additional_metric_names=[],
                     )
+                    # Verity Trial spec
+                    assert isinstance(experiment.spec.trial_template.trial_spec, V1Job)
+
+                elif (
+                    test_name
+                    == "valid flow with custom objective function and PyTorchJob as Trial"
+                ):
+                    # Verity Trial spec
+                    assert isinstance(
+                        experiment.spec.trial_template.trial_spec,
+                        KubeflowOrgV1PyTorchJob,
+                    )
 
                 elif test_name == "valid flow with external model tuning":
                     # Verify input_params
diff --git a/sdk/python/v1beta1/kubeflow/katib/types/types.py b/sdk/python/v1beta1/kubeflow/katib/types/types.py
@@ -1,10 +1,10 @@
 from dataclasses import dataclass
 from typing import Dict
 
+
 # Trainer resources for distributed training.
 @dataclass
 class TrainerResources:
     num_workers: int
     num_procs_per_worker: int
     resources_per_worker: Dict[str, str]
-