diff --git a/charts/azimuth-chat/ci/ui-only-values.yaml b/charts/azimuth-chat/ci/ui-only-values.yaml deleted file mode 100644 index b66347d..0000000 --- a/charts/azimuth-chat/ci/ui-only-values.yaml +++ /dev/null @@ -1,16 +0,0 @@ -azimuth-llm: - api: - enabled: false - ui: - service: - zenith: - enabled: false - appSettings: - # Verify that we can set non-standard LLM params - llm_params: - max_tokens: 101 - temperature: 0.1 - top_p: 0.15 - top_k: 1 - presence_penalty: 0.9 - frequency_penalty: 1 diff --git a/charts/azimuth-llm/values.yaml b/charts/azimuth-llm/values.yaml index 29879d4..cd483c1 100644 --- a/charts/azimuth-llm/values.yaml +++ b/charts/azimuth-llm/values.yaml @@ -56,8 +56,7 @@ api: hostPath: path: /tmp/llm/huggingface-cache # Number of gpus to requests for each api pod instance - # NOTE: This must be in the range 1 <= value <= N, where - # 'N' is the number of GPUs available in a single + # NOTE: This must be less than the number of GPUs available in a single # worker node on the target Kubernetes cluster. # NOTE: According to the vLLM docs found here # https://docs.vllm.ai/en/latest/serving/distributed_serving.html diff --git a/web-apps/chat/defaults.yml b/web-apps/chat/defaults.yml index b0260a9..b9ce61f 100644 --- a/web-apps/chat/defaults.yml +++ b/web-apps/chat/defaults.yml @@ -1,9 +1,3 @@ - -# Default target is a local ollama instance -# running inside the same docker network -model_name: smollm2:135m -backend_url: http://ollama:11434 - host_address: 0.0.0.0 model_instruction: "You are a helpful and cheerful AI assistant. Please respond appropriately."