Adding parser for 1.4 model manifest (#236)

* Adding parser for 1.4 model manifest Signed-off-by: Vishesh Tanksale <[email protected]> * Updating the profile selection logic Signed-off-by: Vishesh Tanksale <[email protected]> * Updating the profile selection logic Signed-off-by: Vishesh Tanksale <[email protected]> * Updating description of new field Signed-off-by: Vishesh Tanksale <[email protected]> * Updating description of new field Signed-off-by: Vishesh Tanksale <[email protected]> --------- Signed-off-by: Vishesh Tanksale <[email protected]>
NVIDIA · Nov 21, 2024 · edb985e · edb985e
1 parent d3e8a73
commit edb985e
Show file tree

Hide file tree

Showing 12 changed files with 667 additions and 273 deletions.
diff --git a/api/apps/v1alpha1/nimcache_types.go b/api/apps/v1alpha1/nimcache_types.go
@@ -92,6 +92,8 @@ type ModelSpec struct {
 	GPUs []GPUSpec `json:"gpus,omitempty"`
 	// Lora indicates a finetuned model with LoRa adapters
 	Lora *bool `json:"lora,omitempty"`
+	// Buildable indicates generic model profiles that can be optimized with an NVIDIA engine for any GPUs
+	Buildable *bool `json:"buildable,omitempty"`
 }
 
 // GPUSpec is the spec required to cache models for selected gpu type

diff --git a/api/apps/v1alpha1/zz_generated.deepcopy.go b/api/apps/v1alpha1/zz_generated.deepcopy.go
diff --git a/bundle/manifests/apps.nvidia.com_nimcaches.yaml b/bundle/manifests/apps.nvidia.com_nimcaches.yaml
@@ -266,6 +266,11 @@ spec:
                       model:
                         description: Model spec for caching
                         properties:
+                          buildable:
+                            description: Buildable indicates generic model profiles
+                              that can be optimized with an NVIDIA engine for any
+                              GPUs
+                            type: boolean
                           engine:
                             description: Engine is the backend engine (tensort_llm,
                               vllm)

diff --git a/config/crd/bases/apps.nvidia.com_nimcaches.yaml b/config/crd/bases/apps.nvidia.com_nimcaches.yaml
@@ -266,6 +266,11 @@ spec:
                       model:
                         description: Model spec for caching
                         properties:
+                          buildable:
+                            description: Buildable indicates generic model profiles
+                              that can be optimized with an NVIDIA engine for any
+                              GPUs
+                            type: boolean
                           engine:
                             description: Engine is the backend engine (tensort_llm,
                               vllm)

diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml
@@ -266,6 +266,11 @@ spec:
                       model:
                         description: Model spec for caching
                         properties:
+                          buildable:
+                            description: Buildable indicates generic model profiles
+                              that can be optimized with an NVIDIA engine for any
+                              GPUs
+                            type: boolean
                           engine:
                             description: Engine is the backend engine (tensort_llm,
                               vllm)

diff --git a/internal/controller/nimcache_controller.go b/internal/controller/nimcache_controller.go
@@ -32,6 +32,7 @@ import (
 	platform "github.com/NVIDIA/k8s-nim-operator/internal/controller/platform"
 	"github.com/NVIDIA/k8s-nim-operator/internal/k8sutil"
 	"github.com/NVIDIA/k8s-nim-operator/internal/nimparser"
+	nimparserutils "github.com/NVIDIA/k8s-nim-operator/internal/nimparser/utils"
 	"github.com/NVIDIA/k8s-nim-operator/internal/render"
 	"github.com/NVIDIA/k8s-nim-operator/internal/shared"
 	"github.com/NVIDIA/k8s-nim-operator/internal/utils"
@@ -621,16 +622,17 @@ func (r *NIMCacheReconciler) reconcileModelManifest(ctx context.Context, nimCach
 		return true, nil
 	}
 
+	parser := nimparserutils.GetNIMParser([]byte(output))
 	// Parse the file
-	manifest, err := nimparser.ParseModelManifestFromRawOutput([]byte(output))
+	manifest, err := parser.ParseModelManifestFromRawOutput([]byte(output))
 	if err != nil {
 		logger.Error(err, "Failed to parse model manifest from the pod")
 		return false, err
 	}
 	logger.V(2).Info("manifest file", "nimcache", nimCache.Name, "manifest", manifest)
 
 	// Create a ConfigMap with the model manifest file for re-use
-	err = r.createManifestConfigMap(ctx, nimCache, manifest)
+	err = r.createManifestConfigMap(ctx, nimCache, &manifest)
 	if err != nil {
 		logger.Error(err, "Failed to create model manifest config map")
 		return false, err
@@ -670,7 +672,7 @@ func (r *NIMCacheReconciler) reconcileModelSelection(ctx context.Context, nimCac
 		}
 
 		// Match profiles with user input
-		profiles, err := nimparser.MatchProfiles(nimCache.Spec.Source.NGC.Model, *nimManifest, discoveredGPUs)
+		profiles, err := nimManifest.MatchProfiles(nimCache.Spec.Source.NGC.Model, discoveredGPUs)
 		if err != nil {
 			logger.Error(err, "Failed to match profiles for given model parameters")
 			return err
@@ -758,17 +760,18 @@ func (r *NIMCacheReconciler) reconcileJobStatus(ctx context.Context, nimCache *a
 			logger.V(2).Info("model manifest config", "manifest", nimManifest)
 
 			// for selected profiles, update relevant info for status
-			for profileName, profileData := range *nimManifest {
+			for _, profileName := range nimManifest.GetProfilesList() {
 				for _, selectedProfile := range selectedProfiles {
 					if profileName == selectedProfile {
 						nimCache.Status.Profiles = append(nimCache.Status.Profiles, appsv1alpha1.NIMProfile{
 							Name:    profileName,
-							Model:   profileData.Model,
-							Config:  profileData.Tags,
-							Release: profileData.Release,
+							Model:   nimManifest.GetProfileModel(profileName),
+							Config:  nimManifest.GetProfileTags(profileName),
+							Release: nimManifest.GetProfileRelease(profileName),
 						})
 					}
 				}
+
 			}
 		}
 
@@ -1241,7 +1244,7 @@ func (r *NIMCacheReconciler) createCertVolumesAndMounts(ctx context.Context, nim
 }
 
 // extractNIMManifest extracts the NIMManifest from the ConfigMap data
-func (r *NIMCacheReconciler) extractNIMManifest(ctx context.Context, configName, namespace string) (*nimparser.NIMManifest, error) {
+func (r *NIMCacheReconciler) extractNIMManifest(ctx context.Context, configName, namespace string) (nimparser.NIMManifestInterface, error) {
 	configMap, err := r.getConfigMap(ctx, configName, namespace)
 	if err != nil {
 		return nil, fmt.Errorf("unable to get ConfigMap %s: %w", configName, err)
@@ -1252,15 +1255,16 @@ func (r *NIMCacheReconciler) extractNIMManifest(ctx context.Context, configName,
 		return nil, fmt.Errorf("model_manifest.yaml not found in ConfigMap")
 	}
 
-	manifest, err := nimparser.ParseModelManifestFromRawOutput([]byte(data))
+	parser := nimparserutils.GetNIMParser([]byte(data))
+	manifest, err := parser.ParseModelManifestFromRawOutput([]byte(data))
 	if err != nil {
 		return nil, fmt.Errorf("failed to unmarshal manifest data: %w", err)
 	}
 	return manifest, nil
 }
 
 // createManifestConfigMap creates a ConfigMap with the given model manifest data
-func (r *NIMCacheReconciler) createManifestConfigMap(ctx context.Context, nimCache *appsv1alpha1.NIMCache, manifestData *nimparser.NIMManifest) error {
+func (r *NIMCacheReconciler) createManifestConfigMap(ctx context.Context, nimCache *appsv1alpha1.NIMCache, manifestData *nimparser.NIMManifestInterface) error {
 	// Convert manifestData to YAML
 	manifestBytes, err := yaml.Marshal(manifestData)
 	if err != nil {

diff --git a/internal/controller/nimcache_controller_test.go b/internal/controller/nimcache_controller_test.go
@@ -40,7 +40,7 @@ import (
 
 	appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1"
 	"github.com/NVIDIA/k8s-nim-operator/internal/k8sutil"
-	"github.com/NVIDIA/k8s-nim-operator/internal/nimparser"
+	nimparserv1 "github.com/NVIDIA/k8s-nim-operator/internal/nimparser/v1"
 )
 
 var _ = Describe("NIMCache Controller", func() {
@@ -79,11 +79,11 @@ var _ = Describe("NIMCache Controller", func() {
 
 		// Create a model manifest configmap, as we cannot run a sample NIM container to extract for tests
 		filePath := filepath.Join("testdata", "manifest_trtllm.yaml")
+		nimparser := nimparserv1.NIMParser{}
 		manifestData, err := nimparser.ParseModelManifest(filePath)
 		Expect(err).NotTo(HaveOccurred())
-		Expect(*manifestData).To(HaveLen(2))
 
-		err = reconciler.createManifestConfigMap(context.TODO(), nimCache, manifestData)
+		err = reconciler.createManifestConfigMap(context.TODO(), nimCache, &manifestData)
 		Expect(err).NotTo(HaveOccurred())
 
 		// Verify that the ConfigMap was created
@@ -601,11 +601,12 @@ var _ = Describe("NIMCache Controller", func() {
 			}
 
 			filePath := filepath.Join("testdata", "manifest_trtllm.yaml")
+
+			nimparser := nimparserv1.NIMParser{}
 			manifestData, err := nimparser.ParseModelManifest(filePath)
 			Expect(err).NotTo(HaveOccurred())
-			Expect(*manifestData).To(HaveLen(2))
 
-			err = reconciler.createManifestConfigMap(ctx, nimCache, manifestData)
+			err = reconciler.createManifestConfigMap(ctx, nimCache, &manifestData)
 			Expect(err).NotTo(HaveOccurred())
 
 			// Verify that the ConfigMap was created
@@ -618,8 +619,9 @@ var _ = Describe("NIMCache Controller", func() {
 			extractedManifest, err := reconciler.extractNIMManifest(ctx, createdConfigMap.Name, createdConfigMap.Namespace)
 			Expect(err).NotTo(HaveOccurred())
 			Expect(extractedManifest).NotTo(BeNil())
-			Expect(*extractedManifest).To(HaveLen(2))
-			profile, exists := (*extractedManifest)["03fdb4d11f01be10c31b00e7c0540e2835e89a0079b483ad2dd3c25c8cc29b61"]
+			nimManifest := extractedManifest.(nimparserv1.NIMManifest)
+
+			profile, exists := (nimManifest)["03fdb4d11f01be10c31b00e7c0540e2835e89a0079b483ad2dd3c25c8cc29b61"]
 			Expect(exists).To(BeTrue())
 			Expect(profile.Model).To(Equal("meta/llama3-70b-instruct"))
 			Expect(profile.Tags["llm_engine"]).To(Equal("tensorrt_llm"))