fix: correct GLM-5.1 specs in prod/dev yaml

claude · claude · commit 706ef4dd43f0 · 2026-04-11T08:36:27.000Z
Research via HF router (Together, Fireworks AI, zai-org providers) and model card confirms GLM-5.1 is 744B total / 40B active params (not 754B), with 202K context. Update both GLM-5.1 and GLM-5.1-FP8 descriptions to reflect accurate architecture. https://claude.ai/code/session_01AjLGLnaXowm91ymkX42wmN
diff --git a/chart/env/dev.yaml b/chart/env/dev.yaml
@@ -79,8 +79,8 @@ envVars:
   PUBLIC_LLM_ROUTER_ALIAS_ID: "omni"
   MODELS: >
     [
-      { "id": "zai-org/GLM-5.1", "description": "Upgraded 754B MoE for agentic coding, extended reasoning, and tool use.", "parameters": { "max_tokens": 32768 } },
-      { "id": "zai-org/GLM-5.1-FP8", "description": "FP8 GLM-5.1 for efficient agentic coding and reasoning inference.", "parameters": { "max_tokens": 32768 } },
+      { "id": "zai-org/GLM-5.1", "description": "Upgraded 744B MoE (40B active) with 202K context for agentic coding and reasoning.", "parameters": { "max_tokens": 32768 } },
+      { "id": "zai-org/GLM-5.1-FP8", "description": "FP8 GLM-5.1 744B MoE for fastest-throughput agentic coding and reasoning.", "parameters": { "max_tokens": 32768 } },
       { "id": "google/gemma-4-31B-it", "description": "Dense multimodal Gemma with 256K context, reasoning, and function calling." },
       { "id": "google/gemma-4-26B-A4B-it", "description": "Efficient multimodal MoE Gemma with 4B active params and 256K context." },
       { "id": "Qwen/Qwen3.5-9B", "description": "Dense multimodal hybrid with 262K context excelling at reasoning on-device." },
diff --git a/chart/env/prod.yaml b/chart/env/prod.yaml
@@ -89,8 +89,8 @@ envVars:
   PUBLIC_LLM_ROUTER_ALIAS_ID: "omni"
   MODELS: >
     [
-      { "id": "zai-org/GLM-5.1", "description": "Upgraded 754B MoE for agentic coding, extended reasoning, and tool use.", "parameters": { "max_tokens": 32768 } },
-      { "id": "zai-org/GLM-5.1-FP8", "description": "FP8 GLM-5.1 for efficient agentic coding and reasoning inference.", "parameters": { "max_tokens": 32768 } },
+      { "id": "zai-org/GLM-5.1", "description": "Upgraded 744B MoE (40B active) with 202K context for agentic coding and reasoning.", "parameters": { "max_tokens": 32768 } },
+      { "id": "zai-org/GLM-5.1-FP8", "description": "FP8 GLM-5.1 744B MoE for fastest-throughput agentic coding and reasoning.", "parameters": { "max_tokens": 32768 } },
       { "id": "google/gemma-4-31B-it", "description": "Dense multimodal Gemma with 256K context, reasoning, and function calling." },
       { "id": "google/gemma-4-26B-A4B-it", "description": "Efficient multimodal MoE Gemma with 4B active params and 256K context." },
       { "id": "Qwen/Qwen3.5-9B", "description": "Dense multimodal hybrid with 262K context excelling at reasoning on-device." },