diff --git a/api/apps/v1alpha1/nimcache_types.go b/api/apps/v1alpha1/nimcache_types.go index 5231d103..cb6b889a 100644 --- a/api/apps/v1alpha1/nimcache_types.go +++ b/api/apps/v1alpha1/nimcache_types.go @@ -92,6 +92,8 @@ type ModelSpec struct { GPUs []GPUSpec `json:"gpus,omitempty"` // Lora indicates a finetuned model with LoRa adapters Lora *bool `json:"lora,omitempty"` + // Buildable indicates generic model profiles that can be optimized with an NVIDIA engine for any GPUs + Buildable *bool `json:"buildable,omitempty"` } // GPUSpec is the spec required to cache models for selected gpu type diff --git a/api/apps/v1alpha1/zz_generated.deepcopy.go b/api/apps/v1alpha1/zz_generated.deepcopy.go index fb05a898..1a8386e5 100644 --- a/api/apps/v1alpha1/zz_generated.deepcopy.go +++ b/api/apps/v1alpha1/zz_generated.deepcopy.go @@ -313,6 +313,11 @@ func (in *ModelSpec) DeepCopyInto(out *ModelSpec) { *out = new(bool) **out = **in } + if in.Buildable != nil { + in, out := &in.Buildable, &out.Buildable + *out = new(bool) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelSpec. diff --git a/bundle/manifests/apps.nvidia.com_nimcaches.yaml b/bundle/manifests/apps.nvidia.com_nimcaches.yaml index 8b3cab97..b61500ef 100644 --- a/bundle/manifests/apps.nvidia.com_nimcaches.yaml +++ b/bundle/manifests/apps.nvidia.com_nimcaches.yaml @@ -266,6 +266,11 @@ spec: model: description: Model spec for caching properties: + buildable: + description: Buildable indicates generic model profiles + that can be optimized with an NVIDIA engine for any + GPUs + type: boolean engine: description: Engine is the backend engine (tensort_llm, vllm) diff --git a/config/crd/bases/apps.nvidia.com_nimcaches.yaml b/config/crd/bases/apps.nvidia.com_nimcaches.yaml index 8b3cab97..b61500ef 100644 --- a/config/crd/bases/apps.nvidia.com_nimcaches.yaml +++ b/config/crd/bases/apps.nvidia.com_nimcaches.yaml @@ -266,6 +266,11 @@ spec: model: description: Model spec for caching properties: + buildable: + description: Buildable indicates generic model profiles + that can be optimized with an NVIDIA engine for any + GPUs + type: boolean engine: description: Engine is the backend engine (tensort_llm, vllm) diff --git a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml index 8b3cab97..b61500ef 100644 --- a/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml +++ b/deployments/helm/k8s-nim-operator/crds/apps.nvidia.com_nimcaches.yaml @@ -266,6 +266,11 @@ spec: model: description: Model spec for caching properties: + buildable: + description: Buildable indicates generic model profiles + that can be optimized with an NVIDIA engine for any + GPUs + type: boolean engine: description: Engine is the backend engine (tensort_llm, vllm) diff --git a/internal/controller/nimcache_controller.go b/internal/controller/nimcache_controller.go index 482cd7b1..5b254693 100644 --- a/internal/controller/nimcache_controller.go +++ b/internal/controller/nimcache_controller.go @@ -32,6 +32,7 @@ import ( platform "github.com/NVIDIA/k8s-nim-operator/internal/controller/platform" "github.com/NVIDIA/k8s-nim-operator/internal/k8sutil" "github.com/NVIDIA/k8s-nim-operator/internal/nimparser" + nimparserutils "github.com/NVIDIA/k8s-nim-operator/internal/nimparser/utils" "github.com/NVIDIA/k8s-nim-operator/internal/render" "github.com/NVIDIA/k8s-nim-operator/internal/shared" "github.com/NVIDIA/k8s-nim-operator/internal/utils" @@ -621,8 +622,9 @@ func (r *NIMCacheReconciler) reconcileModelManifest(ctx context.Context, nimCach return true, nil } + parser := nimparserutils.GetNIMParser([]byte(output)) // Parse the file - manifest, err := nimparser.ParseModelManifestFromRawOutput([]byte(output)) + manifest, err := parser.ParseModelManifestFromRawOutput([]byte(output)) if err != nil { logger.Error(err, "Failed to parse model manifest from the pod") return false, err @@ -630,7 +632,7 @@ func (r *NIMCacheReconciler) reconcileModelManifest(ctx context.Context, nimCach logger.V(2).Info("manifest file", "nimcache", nimCache.Name, "manifest", manifest) // Create a ConfigMap with the model manifest file for re-use - err = r.createManifestConfigMap(ctx, nimCache, manifest) + err = r.createManifestConfigMap(ctx, nimCache, &manifest) if err != nil { logger.Error(err, "Failed to create model manifest config map") return false, err @@ -670,7 +672,7 @@ func (r *NIMCacheReconciler) reconcileModelSelection(ctx context.Context, nimCac } // Match profiles with user input - profiles, err := nimparser.MatchProfiles(nimCache.Spec.Source.NGC.Model, *nimManifest, discoveredGPUs) + profiles, err := nimManifest.MatchProfiles(nimCache.Spec.Source.NGC.Model, discoveredGPUs) if err != nil { logger.Error(err, "Failed to match profiles for given model parameters") return err @@ -758,17 +760,18 @@ func (r *NIMCacheReconciler) reconcileJobStatus(ctx context.Context, nimCache *a logger.V(2).Info("model manifest config", "manifest", nimManifest) // for selected profiles, update relevant info for status - for profileName, profileData := range *nimManifest { + for _, profileName := range nimManifest.GetProfilesList() { for _, selectedProfile := range selectedProfiles { if profileName == selectedProfile { nimCache.Status.Profiles = append(nimCache.Status.Profiles, appsv1alpha1.NIMProfile{ Name: profileName, - Model: profileData.Model, - Config: profileData.Tags, - Release: profileData.Release, + Model: nimManifest.GetProfileModel(profileName), + Config: nimManifest.GetProfileTags(profileName), + Release: nimManifest.GetProfileRelease(profileName), }) } } + } } @@ -1241,7 +1244,7 @@ func (r *NIMCacheReconciler) createCertVolumesAndMounts(ctx context.Context, nim } // extractNIMManifest extracts the NIMManifest from the ConfigMap data -func (r *NIMCacheReconciler) extractNIMManifest(ctx context.Context, configName, namespace string) (*nimparser.NIMManifest, error) { +func (r *NIMCacheReconciler) extractNIMManifest(ctx context.Context, configName, namespace string) (nimparser.NIMManifestInterface, error) { configMap, err := r.getConfigMap(ctx, configName, namespace) if err != nil { return nil, fmt.Errorf("unable to get ConfigMap %s: %w", configName, err) @@ -1252,7 +1255,8 @@ func (r *NIMCacheReconciler) extractNIMManifest(ctx context.Context, configName, return nil, fmt.Errorf("model_manifest.yaml not found in ConfigMap") } - manifest, err := nimparser.ParseModelManifestFromRawOutput([]byte(data)) + parser := nimparserutils.GetNIMParser([]byte(data)) + manifest, err := parser.ParseModelManifestFromRawOutput([]byte(data)) if err != nil { return nil, fmt.Errorf("failed to unmarshal manifest data: %w", err) } @@ -1260,7 +1264,7 @@ func (r *NIMCacheReconciler) extractNIMManifest(ctx context.Context, configName, } // createManifestConfigMap creates a ConfigMap with the given model manifest data -func (r *NIMCacheReconciler) createManifestConfigMap(ctx context.Context, nimCache *appsv1alpha1.NIMCache, manifestData *nimparser.NIMManifest) error { +func (r *NIMCacheReconciler) createManifestConfigMap(ctx context.Context, nimCache *appsv1alpha1.NIMCache, manifestData *nimparser.NIMManifestInterface) error { // Convert manifestData to YAML manifestBytes, err := yaml.Marshal(manifestData) if err != nil { diff --git a/internal/controller/nimcache_controller_test.go b/internal/controller/nimcache_controller_test.go index 35b50c19..6156dde7 100644 --- a/internal/controller/nimcache_controller_test.go +++ b/internal/controller/nimcache_controller_test.go @@ -40,7 +40,7 @@ import ( appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1" "github.com/NVIDIA/k8s-nim-operator/internal/k8sutil" - "github.com/NVIDIA/k8s-nim-operator/internal/nimparser" + nimparserv1 "github.com/NVIDIA/k8s-nim-operator/internal/nimparser/v1" ) var _ = Describe("NIMCache Controller", func() { @@ -79,11 +79,11 @@ var _ = Describe("NIMCache Controller", func() { // Create a model manifest configmap, as we cannot run a sample NIM container to extract for tests filePath := filepath.Join("testdata", "manifest_trtllm.yaml") + nimparser := nimparserv1.NIMParser{} manifestData, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*manifestData).To(HaveLen(2)) - err = reconciler.createManifestConfigMap(context.TODO(), nimCache, manifestData) + err = reconciler.createManifestConfigMap(context.TODO(), nimCache, &manifestData) Expect(err).NotTo(HaveOccurred()) // Verify that the ConfigMap was created @@ -601,11 +601,12 @@ var _ = Describe("NIMCache Controller", func() { } filePath := filepath.Join("testdata", "manifest_trtllm.yaml") + + nimparser := nimparserv1.NIMParser{} manifestData, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*manifestData).To(HaveLen(2)) - err = reconciler.createManifestConfigMap(ctx, nimCache, manifestData) + err = reconciler.createManifestConfigMap(ctx, nimCache, &manifestData) Expect(err).NotTo(HaveOccurred()) // Verify that the ConfigMap was created @@ -618,8 +619,9 @@ var _ = Describe("NIMCache Controller", func() { extractedManifest, err := reconciler.extractNIMManifest(ctx, createdConfigMap.Name, createdConfigMap.Namespace) Expect(err).NotTo(HaveOccurred()) Expect(extractedManifest).NotTo(BeNil()) - Expect(*extractedManifest).To(HaveLen(2)) - profile, exists := (*extractedManifest)["03fdb4d11f01be10c31b00e7c0540e2835e89a0079b483ad2dd3c25c8cc29b61"] + nimManifest := extractedManifest.(nimparserv1.NIMManifest) + + profile, exists := (nimManifest)["03fdb4d11f01be10c31b00e7c0540e2835e89a0079b483ad2dd3c25c8cc29b61"] Expect(exists).To(BeTrue()) Expect(profile.Model).To(Equal("meta/llama3-70b-instruct")) Expect(profile.Tags["llm_engine"]).To(Equal("tensorrt_llm")) diff --git a/internal/nimparser/nimparser.go b/internal/nimparser/nimparser.go index 127d8ee9..69102965 100644 --- a/internal/nimparser/nimparser.go +++ b/internal/nimparser/nimparser.go @@ -17,13 +17,7 @@ limitations under the License. package nimparser import ( - "os" - "regexp" - "strconv" - "strings" - appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1" - "gopkg.in/yaml.v2" ) const ( @@ -31,226 +25,19 @@ const ( BackendTypeTensorRT = "tensorrt" ) -// File represents the model files -type File struct { - Name string `yaml:"name" json:"name,omitempty"` -} - -// Src represents model source -type Src struct { - RepoID string `yaml:"repo_id" json:"repo_id,omitempty"` - Files []File `yaml:"files" json:"files,omitempty"` -} - -// Component represents source and destination for model files -type Component struct { - Dst string `yaml:"dst" json:"dst,omitempty"` - Src Src `yaml:"src" json:"src,omitempty"` -} - -// Workspace represents workspace for model components -type Workspace struct { - Components []Component `yaml:"components" json:"components,omitempty"` -} - -// NIMProfile is the model profile supported by the NIM container -type NIMProfile struct { - Model string `yaml:"model" json:"model,omitempty"` - Release string `yaml:"release" json:"release,omitempty"` - Tags map[string]string `yaml:"tags" json:"tags,omitempty"` - ContainerURL string `yaml:"container_url" json:"container_url,omitempty"` - Workspace Workspace `yaml:"workspace" json:"workspace,omitempty"` -} - -// NIMManifest is the model manifest file -type NIMManifest map[string]NIMProfile - -// UnmarshalYAML is the custom unmarshal function for Src -func (s *Src) UnmarshalYAML(unmarshal func(interface{}) error) error { - var raw map[string]interface{} - if err := unmarshal(&raw); err != nil { - return err - } - - if repoID, ok := raw["repo_id"].(string); ok { - s.RepoID = repoID - } - - if files, ok := raw["files"].([]interface{}); ok { - for _, file := range files { - if fileStr, ok := file.(string); ok { - s.Files = append(s.Files, File{Name: fileStr}) - } else if fileMap, ok := file.(map[interface{}]interface{}); ok { - for k := range fileMap { - if fileName, ok := k.(string); ok { - s.Files = append(s.Files, File{Name: fileName}) - } - } - } - } - } - - return nil -} - -// UnmarshalYAML unmarshalls given yaml data into NIM manifest struct -func (f *File) UnmarshalYAML(unmarshal func(interface{}) error) error { - var name string - if err := unmarshal(&name); err != nil { - return err - } - f.Name = name - return nil -} - -// ParseModelManifest parses the given NIM manifest yaml file -func ParseModelManifest(filePath string) (*NIMManifest, error) { - data, err := os.ReadFile(filePath) - if err != nil { - return nil, err - } - - var config NIMManifest - err = yaml.Unmarshal(data, &config) - if err != nil { - return nil, err - } - return &config, nil -} - -// ParseModelManifestFromRawOutput parses the given raw NIM manifest data -func ParseModelManifestFromRawOutput(data []byte) (*NIMManifest, error) { - var config NIMManifest - err := yaml.Unmarshal(data, &config) - if err != nil { - return nil, err - } - return &config, nil +type NIMSchemaManifest struct { + SchemaVersion string `yaml:"schema_version" json:"schema_version,omitempty"` } -// MatchProfiles matches the given model parameters with the profiles in the manifest -func MatchProfiles(modelSpec appsv1alpha1.ModelSpec, manifest NIMManifest, discoveredGPUs []string) ([]string, error) { - var selectedProfiles []string - - for hash, profile := range manifest { - // Check precision, tensor parallelism, and QoS profile - if (modelSpec.Precision != "" && profile.Tags["precision"] != modelSpec.Precision) || - (modelSpec.TensorParallelism != "" && profile.Tags["tp"] != modelSpec.TensorParallelism) || - (modelSpec.QoSProfile != "" && profile.Tags["profile"] != modelSpec.QoSProfile) { - continue - } - - // Check LoRA configuration - if modelSpec.Lora == nil && profile.Tags["feat_lora"] == "true" { - continue - } - if modelSpec.Lora != nil && profile.Tags["feat_lora"] != strconv.FormatBool(*modelSpec.Lora) { - continue - } - - // Determine backend type - backend := profile.Tags["llm_engine"] - if backend == "" { - backend = profile.Tags["backend"] - } - - if modelSpec.Engine != "" && !strings.Contains(backend, strings.TrimSuffix(modelSpec.Engine, "_llm")) { - continue - } - - // Perform GPU match only when optimized engine is selected or GPU filters are provided - if isOptimizedEngine(modelSpec.Engine) || len(modelSpec.GPUs) > 0 { - // Skip non optimized profiles - if !isOptimizedEngine(backend) { - continue - } - if len(modelSpec.GPUs) > 0 || len(discoveredGPUs) > 0 { - if !matchGPUProfile(modelSpec, profile, discoveredGPUs) { - continue - } - } - } - - // Profile matched the given model parameters, add hash to the selected profiles - selectedProfiles = append(selectedProfiles, hash) - } - - return selectedProfiles, nil +type NIMParserInterface interface { + ParseModelManifest(filePath string) (NIMManifestInterface, error) + ParseModelManifestFromRawOutput(data []byte) (NIMManifestInterface, error) } -func isOptimizedEngine(engine string) bool { - return engine != "" && strings.Contains(strings.ToLower(engine), BackendTypeTensorRT) -} - -func matchGPUProfile(modelSpec appsv1alpha1.ModelSpec, profile NIMProfile, discoveredGPUs []string) bool { - foundGPU := false - - for _, gpu := range modelSpec.GPUs { - // Check for GPU product match - if gpu.Product != "" { - // Check if the product matches the "gpu" tag - if strings.Contains(strings.ToLower(profile.Tags["gpu"]), strings.ToLower(gpu.Product)) { - foundGPU = true - } - - // Check if the product matches the "key" tag - if strings.Contains(strings.ToLower(profile.Tags["key"]), strings.ToLower(gpu.Product)) { - foundGPU = true - } - - // If the GPU product matches, check the GPU IDs - if foundGPU && len(gpu.IDs) > 0 { - foundID := false - for _, id := range gpu.IDs { - if id == strings.TrimSuffix(profile.Tags["gpu_device"], ":10de") { - foundID = true - break - } - } - - // If the GPU product matches but no IDs match, return false - if !foundID { - return false - } - } - } - } - - // If a GPU product was matched and IDs (if any) also matched, return true - if foundGPU { - return true - } - - // If no match was found in the specified GPUs, check the discovered GPUs - for _, productLabel := range discoveredGPUs { - if productLabel != "" { - // match for llm nim format - if strings.Contains(strings.ToLower(productLabel), strings.ToLower(profile.Tags["gpu"])) { - return true - } - // match for non-llm nim format - if matches, _ := matchesRegex(productLabel, profile.Tags["product_name_regex"]); matches { - return true - } - } - } - - // If no match found in both specified and discovered GPUs, return false - return false -} - -func matchesRegex(productLabel, regexPattern string) (bool, error) { - // If regexPattern is empty, return false - if regexPattern == "" { - return false, nil - } - - // Compile the regex pattern - regex, err := regexp.Compile(regexPattern) - if err != nil { - return false, err - } - - // Check if the productLabel matches the regex - return regex.MatchString(productLabel), nil +type NIMManifestInterface interface { + MatchProfiles(modelSpec appsv1alpha1.ModelSpec, discoveredGPUs []string) ([]string, error) + GetProfilesList() []string + GetProfileModel(profileID string) string + GetProfileTags(profileID string) map[string]string + GetProfileRelease(profileID string) string } diff --git a/internal/nimparser/utils/utils.go b/internal/nimparser/utils/utils.go new file mode 100644 index 00000000..1f3b582d --- /dev/null +++ b/internal/nimparser/utils/utils.go @@ -0,0 +1,52 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "regexp" + "strings" + + nimparser "github.com/NVIDIA/k8s-nim-operator/internal/nimparser" + nimparserv1 "github.com/NVIDIA/k8s-nim-operator/internal/nimparser/v1" + nimparserv2 "github.com/NVIDIA/k8s-nim-operator/internal/nimparser/v2" + "gopkg.in/yaml.v2" +) + +// GetNIMParser unmarshals the provided byte slice into a NIMSchemaManifest struct and returns +// the corresponding NIMParserInterface implementation based on the schema version. +// If the unmarshalling fails or the schema version is not recognized, it returns a NIMParser from v1. +// +// Parameters: +// - data: A byte slice containing the YAML data to be unmarshaled. +// +// Returns: +// - A NIMParserInterface implementation based on the schema version. +func GetNIMParser(data []byte) nimparser.NIMParserInterface { + var config nimparser.NIMSchemaManifest + err := yaml.Unmarshal(data, &config) + if err != nil { + return nimparserv1.NIMParser{} + } else { + schemaVersion := strings.TrimSpace(config.SchemaVersion) + + match, _ := regexp.MatchString("2\\.*\\.*", schemaVersion) + if match { + return nimparserv2.NIMParser{} + } + } + return nimparserv1.NIMParser{} +} diff --git a/internal/nimparser/v1/nimparser.go b/internal/nimparser/v1/nimparser.go new file mode 100644 index 00000000..5aea6f7d --- /dev/null +++ b/internal/nimparser/v1/nimparser.go @@ -0,0 +1,274 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1 + +import ( + "maps" + "os" + "regexp" + "slices" + "strconv" + "strings" + + appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1" + "github.com/NVIDIA/k8s-nim-operator/internal/nimparser" + "gopkg.in/yaml.v2" +) + +const ( + // BackendTypeTensorRT indicates tensortt backend + BackendTypeTensorRT = "tensorrt" +) + +// File represents the model files +type File struct { + Name string `yaml:"name" json:"name,omitempty"` +} + +// Src represents model source +type Src struct { + RepoID string `yaml:"repo_id" json:"repo_id,omitempty"` + Files []File `yaml:"files" json:"files,omitempty"` +} + +// Component represents source and destination for model files +type Component struct { + Dst string `yaml:"dst" json:"dst,omitempty"` + Src Src `yaml:"src" json:"src,omitempty"` +} + +// Workspace represents workspace for model components +type Workspace struct { + Components []Component `yaml:"components" json:"components,omitempty"` +} + +// NIMProfile is the model profile supported by the NIM container +type NIMProfile struct { + Model string `yaml:"model" json:"model,omitempty"` + Release string `yaml:"release" json:"release,omitempty"` + Tags map[string]string `yaml:"tags" json:"tags,omitempty"` + ContainerURL string `yaml:"container_url" json:"container_url,omitempty"` + Workspace Workspace `yaml:"workspace" json:"workspace,omitempty"` +} + +// NIMManifest is the model manifest file +type NIMManifest map[string]NIMProfile + +// UnmarshalYAML is the custom unmarshal function for Src +func (s *Src) UnmarshalYAML(unmarshal func(interface{}) error) error { + var raw map[string]interface{} + if err := unmarshal(&raw); err != nil { + return err + } + + if repoID, ok := raw["repo_id"].(string); ok { + s.RepoID = repoID + } + + if files, ok := raw["files"].([]interface{}); ok { + for _, file := range files { + if fileStr, ok := file.(string); ok { + s.Files = append(s.Files, File{Name: fileStr}) + } else if fileMap, ok := file.(map[interface{}]interface{}); ok { + for k := range fileMap { + if fileName, ok := k.(string); ok { + s.Files = append(s.Files, File{Name: fileName}) + } + } + } + } + } + + return nil +} + +// UnmarshalYAML unmarshalls given yaml data into NIM manifest struct +func (f *File) UnmarshalYAML(unmarshal func(interface{}) error) error { + var name string + if err := unmarshal(&name); err != nil { + return err + } + f.Name = name + return nil +} + +func (manifest NIMManifest) MatchProfiles(modelSpec appsv1alpha1.ModelSpec, discoveredGPUs []string) ([]string, error) { + //TODO implement me + var selectedProfiles []string + + for hash, profile := range manifest { + // Check precision, tensor parallelism, and QoS profile + if (modelSpec.Precision != "" && profile.Tags["precision"] != modelSpec.Precision) || + (modelSpec.TensorParallelism != "" && profile.Tags["tp"] != modelSpec.TensorParallelism) || + (modelSpec.QoSProfile != "" && profile.Tags["profile"] != modelSpec.QoSProfile) { + continue + } + + // Check LoRA configuration + if modelSpec.Lora == nil && profile.Tags["feat_lora"] == "true" { + continue + } + if modelSpec.Lora != nil && profile.Tags["feat_lora"] != strconv.FormatBool(*modelSpec.Lora) { + continue + } + + // Determine backend type + backend := profile.Tags["llm_engine"] + if backend == "" { + backend = profile.Tags["backend"] + } + + if modelSpec.Engine != "" && !strings.Contains(backend, strings.TrimSuffix(modelSpec.Engine, "_llm")) { + continue + } + + // Perform GPU match only when optimized engine is selected or GPU filters are provided + if isOptimizedEngine(modelSpec.Engine) || len(modelSpec.GPUs) > 0 { + // Skip non optimized profiles + if !isOptimizedEngine(backend) { + continue + } + if len(modelSpec.GPUs) > 0 || len(discoveredGPUs) > 0 { + if !matchGPUProfile(modelSpec, profile, discoveredGPUs) { + continue + } + } + } + + // Profile matched the given model parameters, add hash to the selected profiles + selectedProfiles = append(selectedProfiles, hash) + } + + return selectedProfiles, nil +} + +func (manifest NIMManifest) GetProfilesList() []string { + keys := slices.Collect(maps.Keys(manifest)) + return keys +} +func (manifest NIMManifest) GetProfileModel(profileID string) string { + return manifest[profileID].Model +} +func (manifest NIMManifest) GetProfileTags(profileID string) map[string]string { + return manifest[profileID].Tags +} +func (manifest NIMManifest) GetProfileRelease(profileID string) string { + return manifest[profileID].Release +} + +func isOptimizedEngine(engine string) bool { + return engine != "" && strings.Contains(strings.ToLower(engine), BackendTypeTensorRT) +} + +func matchGPUProfile(modelSpec appsv1alpha1.ModelSpec, profile NIMProfile, discoveredGPUs []string) bool { + foundGPU := false + + for _, gpu := range modelSpec.GPUs { + // Check for GPU product match + if gpu.Product != "" { + // Check if the product matches the "gpu" tag + if strings.Contains(strings.ToLower(profile.Tags["gpu"]), strings.ToLower(gpu.Product)) { + foundGPU = true + } + + // Check if the product matches the "key" tag + if strings.Contains(strings.ToLower(profile.Tags["key"]), strings.ToLower(gpu.Product)) { + foundGPU = true + } + + // If the GPU product matches, check the GPU IDs + if foundGPU && len(gpu.IDs) > 0 { + foundID := false + for _, id := range gpu.IDs { + if id == strings.TrimSuffix(profile.Tags["gpu_device"], ":10de") { + foundID = true + break + } + } + + // If the GPU product matches but no IDs match, return false + if !foundID { + return false + } + } + } + } + + // If a GPU product was matched and IDs (if any) also matched, return true + if foundGPU { + return true + } + + // If no match was found in the specified GPUs, check the discovered GPUs + for _, productLabel := range discoveredGPUs { + if productLabel != "" { + // match for llm nim format + if strings.Contains(strings.ToLower(productLabel), strings.ToLower(profile.Tags["gpu"])) { + return true + } + // match for non-llm nim format + if matches, _ := matchesRegex(productLabel, profile.Tags["product_name_regex"]); matches { + return true + } + } + } + + // If no match found in both specified and discovered GPUs, return false + return false +} + +func matchesRegex(productLabel, regexPattern string) (bool, error) { + // If regexPattern is empty, return false + if regexPattern == "" { + return false, nil + } + + // Compile the regex pattern + regex, err := regexp.Compile(regexPattern) + if err != nil { + return false, err + } + + // Check if the productLabel matches the regex + return regex.MatchString(productLabel), nil +} + +type NIMParser struct{} + +func (NIMParser) ParseModelManifest(filePath string) (nimparser.NIMManifestInterface, error) { + data, err := os.ReadFile(filePath) + if err != nil { + return nil, err + } + + var config NIMManifest + err = yaml.Unmarshal(data, &config) + if err != nil { + return nil, err + } + return config, nil + +} + +func (NIMParser) ParseModelManifestFromRawOutput(data []byte) (nimparser.NIMManifestInterface, error) { + var config NIMManifest + err := yaml.Unmarshal(data, &config) + if err != nil { + return nil, err + } + return config, nil +} diff --git a/internal/nimparser/nimparser_test.go b/internal/nimparser/v1/nimparser_test.go similarity index 72% rename from internal/nimparser/nimparser_test.go rename to internal/nimparser/v1/nimparser_test.go index d69796ff..b85b1597 100644 --- a/internal/nimparser/nimparser_test.go +++ b/internal/nimparser/v1/nimparser_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package nimparser +package v1 import ( "path/filepath" @@ -32,11 +32,12 @@ var _ = Describe("NIMParser", func() { It("should parse a model profile for trtllm engine files correctly", func() { filePath := filepath.Join("testdata", "manifest_trtllm.yaml") - config, err := ParseModelManifest(filePath) + nimparser := NIMParser{} + config, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*config).To(HaveLen(1)) + nimManifest := config.(NIMManifest) - profile, exists := (*config)["03fdb4d11f01be10c31b00e7c0540e2835e89a0079b483ad2dd3c25c8cc29b61"] + profile, exists := (nimManifest)["03fdb4d11f01be10c31b00e7c0540e2835e89a0079b483ad2dd3c25c8cc29b61"] Expect(exists).To(BeTrue()) Expect(profile.Model).To(Equal("meta/llama3-70b-instruct")) Expect(profile.Tags["llm_engine"]).To(Equal("tensorrt_llm")) @@ -46,11 +47,12 @@ var _ = Describe("NIMParser", func() { It("should parse a model profile for vllm engine files correctly", func() { filePath := filepath.Join("testdata", "manifest_vllm.yaml") - config, err := ParseModelManifest(filePath) + nimparser := NIMParser{} + config, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*config).To(HaveLen(1)) + nimManifest := config.(NIMManifest) - profile, exists := (*config)["0f3de1afe11d355e01657424a267fbaad19bfea3143a9879307c49aed8299db0"] + profile, exists := nimManifest["0f3de1afe11d355e01657424a267fbaad19bfea3143a9879307c49aed8299db0"] Expect(exists).To(BeTrue()) Expect(profile.Model).To(Equal("meta/llama3-70b-instruct")) Expect(profile.Tags["llm_engine"]).To(Equal("vllm")) @@ -60,11 +62,12 @@ var _ = Describe("NIMParser", func() { It("should parse a model profile with lora adapters correctly", func() { filePath := filepath.Join("testdata", "manifest_lora.yaml") - config, err := ParseModelManifest(filePath) + nimparser := NIMParser{} + config, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*config).To(HaveLen(1)) + nimManifest := config.(NIMManifest) - profile, exists := (*config)["36fc1fa4fc35c1d54da115a39323080b08d7937dceb8ba47be44f4da0ec720ff"] + profile, exists := (nimManifest)["36fc1fa4fc35c1d54da115a39323080b08d7937dceb8ba47be44f4da0ec720ff"] Expect(exists).To(BeTrue()) Expect(profile.Model).To(Equal("meta/llama3-70b-instruct")) Expect(profile.Tags["feat_lora"]).To(Equal("true")) @@ -74,11 +77,12 @@ var _ = Describe("NIMParser", func() { }) It("should match model profiles with valid parameters", func() { filePath := filepath.Join("testdata", "manifest_trtllm.yaml") - config, err := ParseModelManifest(filePath) + nimparser := NIMParser{} + config, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*config).To(HaveLen(1)) + nimManifest := config.(NIMManifest) - profile, exists := (*config)["03fdb4d11f01be10c31b00e7c0540e2835e89a0079b483ad2dd3c25c8cc29b61"] + profile, exists := (nimManifest)["03fdb4d11f01be10c31b00e7c0540e2835e89a0079b483ad2dd3c25c8cc29b61"] Expect(exists).To(BeTrue()) Expect(profile.Model).To(Equal("meta/llama3-70b-instruct")) Expect(profile.Tags["llm_engine"]).To(Equal("tensorrt_llm")) @@ -93,16 +97,17 @@ var _ = Describe("NIMParser", func() { IDs: []string{"26b5"}}, }, } - matchedProfiles, err := MatchProfiles(modelSpec, *config, nil) + matchedProfiles, err := nimManifest.MatchProfiles(modelSpec, nil) Expect(err).NotTo(HaveOccurred()) Expect(matchedProfiles).NotTo(BeEmpty()) Expect(matchedProfiles).To(HaveLen(1)) }) It("should not match model profiles with invalid parameters", func() { filePath := filepath.Join("testdata", "manifest_trtllm.yaml") - config, err := ParseModelManifest(filePath) + nimparser := NIMParser{} + config, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*config).To(HaveLen(1)) + nimManifest := config.(NIMManifest) modelSpec := appsv1alpha1.ModelSpec{Precision: "fp16", Engine: "tensorrt_llm", @@ -112,15 +117,16 @@ var _ = Describe("NIMParser", func() { IDs: []string{"abcd"}}, // invalid entry }, } - matchedProfiles, err := MatchProfiles(modelSpec, *config, nil) + matchedProfiles, err := nimManifest.MatchProfiles(modelSpec, nil) Expect(err).NotTo(HaveOccurred()) Expect(matchedProfiles).To(BeEmpty()) }) It("should match model profiles using automatically discovered GPUs", func() { filePath := filepath.Join("testdata", "manifest_trtllm.yaml") - config, err := ParseModelManifest(filePath) + nimparser := NIMParser{} + config, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*config).To(HaveLen(1)) + nimManifest := config.(NIMManifest) // Match using discovered GPUs (node product labels) modelSpec := appsv1alpha1.ModelSpec{Precision: "fp16", @@ -128,64 +134,68 @@ var _ = Describe("NIMParser", func() { QoSProfile: "throughput", TensorParallelism: "8", } - matchedProfiles, err := MatchProfiles(modelSpec, *config, []string{"NVIDIA-L40S-48C"}) + matchedProfiles, err := nimManifest.MatchProfiles(modelSpec, []string{"NVIDIA-L40S-48C"}) Expect(err).NotTo(HaveOccurred()) Expect(matchedProfiles).NotTo(BeEmpty()) Expect(matchedProfiles).To(HaveLen(1)) }) It("should match model profiles when lora is enabled", func() { filePath := filepath.Join("testdata", "manifest_lora.yaml") - config, err := ParseModelManifest(filePath) + nimparser := NIMParser{} + config, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*config).To(HaveLen(1)) + nimManifest := config.(NIMManifest) // Match using Lora modelSpec := appsv1alpha1.ModelSpec{ Lora: utils.BoolPtr(true), } - matchedProfiles, err := MatchProfiles(modelSpec, *config, nil) + matchedProfiles, err := nimManifest.MatchProfiles(modelSpec, nil) Expect(err).NotTo(HaveOccurred()) Expect(matchedProfiles).To(HaveLen(1)) }) It("should not match model profiles when lora is not provided and profile has lora enabled", func() { filePath := filepath.Join("testdata", "manifest_lora.yaml") - config, err := ParseModelManifest(filePath) + nimparser := NIMParser{} + config, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*config).To(HaveLen(1)) + nimManifest := config.(NIMManifest) modelSpec := appsv1alpha1.ModelSpec{ Engine: "tensorrt_llm", } - matchedProfiles, err := MatchProfiles(modelSpec, *config, nil) + matchedProfiles, err := nimManifest.MatchProfiles(modelSpec, nil) Expect(err).NotTo(HaveOccurred()) Expect(matchedProfiles).To(BeEmpty()) }) It("should match model profiles with different engine parameters for non-llm manifest", func() { filePath := filepath.Join("testdata", "manifest_non_llm.yaml") - config, err := ParseModelManifest(filePath) + nimparser := NIMParser{} + config, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*config).To(HaveLen(2)) + nimManifest := config.(NIMManifest) // Match using backend modelSpec := appsv1alpha1.ModelSpec{ Engine: "tensorrt", // instead of tensorrt_llm for llm nims } - matchedProfiles, err := MatchProfiles(modelSpec, *config, []string{"NVIDIA-A10G"}) + matchedProfiles, err := nimManifest.MatchProfiles(modelSpec, []string{"NVIDIA-A10G"}) Expect(err).NotTo(HaveOccurred()) Expect(matchedProfiles).To(HaveLen(1)) }) It("should match model profiles with different gpu parameters for non-llm manifest", func() { filePath := filepath.Join("testdata", "manifest_non_llm.yaml") - config, err := ParseModelManifest(filePath) + nimparser := NIMParser{} + config, err := nimparser.ParseModelManifest(filePath) Expect(err).NotTo(HaveOccurred()) - Expect(*config).To(HaveLen(2)) + nimManifest := config.(NIMManifest) // Match using GPU product name modelSpec := appsv1alpha1.ModelSpec{ GPUs: []appsv1alpha1.GPUSpec{{Product: "A10G"}}, } - matchedProfiles, err := MatchProfiles(modelSpec, *config, nil) + matchedProfiles, err := nimManifest.MatchProfiles(modelSpec, nil) Expect(err).NotTo(HaveOccurred()) Expect(matchedProfiles).To(HaveLen(1)) }) diff --git a/internal/nimparser/v2/nimparser.go b/internal/nimparser/v2/nimparser.go new file mode 100644 index 00000000..675516f3 --- /dev/null +++ b/internal/nimparser/v2/nimparser.go @@ -0,0 +1,243 @@ +/* +Copyright 2024. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v2 + +import ( + "os" + "regexp" + "strconv" + "strings" + + appsv1alpha1 "github.com/NVIDIA/k8s-nim-operator/api/apps/v1alpha1" + "github.com/NVIDIA/k8s-nim-operator/internal/nimparser" + "gopkg.in/yaml.v2" +) + +const ( + // BackendTypeTensorRT indicates tensortt backend + BackendTypeTensorRT = "tensorrt" +) + +// Uri represents model source +type Uri struct { + Uri string `yaml:"uri" json:"uri,omitempty"` +} + +// Workspace represents workspace for model components +type Workspace struct { + Files map[string]Uri `yaml:"files" json:"files,omitempty"` +} + +// NIMProfile is the model profile supported by the NIM container +type NIMProfile struct { + ID string `yaml:"id" json:"id,omitempty"` + Tags map[string]string `yaml:"tags" json:"tags,omitempty"` + Workspace Workspace `yaml:"workspace" json:"workspace,omitempty"` +} + +// NIMManifest is the model manifest file +type NIMManifest struct { + SchemaVersion string `yaml:"schema_version" json:"schema_version,omitempty"` + ProfileSelectionCriteria string `yaml:"profile_selection_criteria" json:"profile_selection_criteria,omitempty"` + Profiles []NIMProfile `yaml:"profiles" json:"profiles,omitempty"` +} + +func (manifest NIMManifest) MatchProfiles(modelSpec appsv1alpha1.ModelSpec, discoveredGPUs []string) ([]string, error) { + //TODO implement me + var selectedProfiles []string + + for _, profile := range manifest.Profiles { + // Check precision, tensor parallelism, and QoS profile + if (modelSpec.Precision != "" && profile.Tags["precision"] != modelSpec.Precision) || + (modelSpec.TensorParallelism != "" && profile.Tags["tp"] != modelSpec.TensorParallelism) || + (modelSpec.QoSProfile != "" && profile.Tags["profile"] != modelSpec.QoSProfile) { + continue + } + + // Check LoRA configuration + if modelSpec.Lora == nil && profile.Tags["feat_lora"] == "true" { + continue + } + if modelSpec.Lora != nil && profile.Tags["feat_lora"] != strconv.FormatBool(*modelSpec.Lora) { + continue + } + + if modelSpec.Buildable == nil && profile.Tags["trtllm_buildable"] == "true" { + continue + } + + if modelSpec.Buildable != nil && profile.Tags["trtllm_buildable"] != strconv.FormatBool(*modelSpec.Buildable) { + continue + } + + // Determine backend type + backend := profile.Tags["llm_engine"] + if backend == "" { + backend = profile.Tags["backend"] + } + + if modelSpec.Engine != "" && !strings.Contains(backend, strings.TrimSuffix(modelSpec.Engine, "_llm")) { + continue + } + + // Perform GPU match only when optimized engine is selected or GPU filters are provided + if profile.Tags["trtllm_buildable"] != "true" && (isOptimizedEngine(modelSpec.Engine) || len(modelSpec.GPUs) > 0) { + // Skip non optimized profiles + if !isOptimizedEngine(backend) { + continue + } + + if len(modelSpec.GPUs) > 0 || len(discoveredGPUs) > 0 { + if !matchGPUProfile(modelSpec, profile, discoveredGPUs) { + continue + } + } + } + + // Profile matched the given model parameters, add hash to the selected profiles + selectedProfiles = append(selectedProfiles, profile.ID) + } + + return selectedProfiles, nil +} + +func (manifest NIMManifest) GetProfilesList() []string { + + profileIDs := make([]string, len(manifest.Profiles)) + + for k, profile := range manifest.Profiles { + profileIDs[k] = profile.ID + } + return profileIDs +} +func (manifest NIMManifest) GetProfileModel(profileID string) string { + return "" +} +func (manifest NIMManifest) GetProfileTags(profileID string) map[string]string { + for _, profile := range manifest.Profiles { + if profileID == profile.ID { + return profile.Tags + } + } + return nil +} +func (manifest NIMManifest) GetProfileRelease(profileID string) string { + return "" +} + +func isOptimizedEngine(engine string) bool { + return engine != "" && strings.Contains(strings.ToLower(engine), BackendTypeTensorRT) +} + +func matchGPUProfile(modelSpec appsv1alpha1.ModelSpec, profile NIMProfile, discoveredGPUs []string) bool { + foundGPU := false + + for _, gpu := range modelSpec.GPUs { + // Check for GPU product match + if gpu.Product != "" { + // Check if the product matches the "gpu" tag + if strings.Contains(strings.ToLower(profile.Tags["gpu"]), strings.ToLower(gpu.Product)) { + foundGPU = true + } + + // Check if the product matches the "key" tag + if strings.Contains(strings.ToLower(profile.Tags["key"]), strings.ToLower(gpu.Product)) { + foundGPU = true + } + + // If the GPU product matches, check the GPU IDs + if foundGPU && len(gpu.IDs) > 0 { + foundID := false + for _, id := range gpu.IDs { + if id == strings.TrimSuffix(profile.Tags["gpu_device"], ":10de") { + foundID = true + break + } + } + + // If the GPU product matches but no IDs match, return false + if !foundID { + return false + } + } + } + } + + // If a GPU product was matched and IDs (if any) also matched, return true + if foundGPU { + return true + } + + // If no match was found in the specified GPUs, check the discovered GPUs + for _, productLabel := range discoveredGPUs { + if productLabel != "" { + // match for llm nim format + if strings.Contains(strings.ToLower(productLabel), strings.ToLower(profile.Tags["gpu"])) { + return true + } + // match for non-llm nim format + if matches, _ := matchesRegex(productLabel, profile.Tags["product_name_regex"]); matches { + return true + } + } + } + + // If no match found in both specified and discovered GPUs, return false + return false +} + +func matchesRegex(productLabel, regexPattern string) (bool, error) { + // If regexPattern is empty, return false + if regexPattern == "" { + return false, nil + } + + // Compile the regex pattern + regex, err := regexp.Compile(regexPattern) + if err != nil { + return false, err + } + + // Check if the productLabel matches the regex + return regex.MatchString(productLabel), nil +} + +type NIMParser struct{} + +func (NIMParser) ParseModelManifest(filePath string) (nimparser.NIMManifestInterface, error) { + data, err := os.ReadFile(filePath) + if err != nil { + return nil, err + } + + var config NIMManifest + err = yaml.Unmarshal(data, &config) + if err != nil { + return nil, err + } + return config, nil + +} + +func (NIMParser) ParseModelManifestFromRawOutput(data []byte) (nimparser.NIMManifestInterface, error) { + var config NIMManifest + err := yaml.Unmarshal(data, &config) + if err != nil { + return nil, err + } + return config, nil +}