From 39d5dd2e75cacd287ce041811c1aa3e44982c4ea Mon Sep 17 00:00:00 2001 From: djl11 Date: Wed, 19 Feb 2025 01:47:27 +0700 Subject: [PATCH] moved routing sections into Universal API section. --- mint.json | 10 ++-------- routing/overview.mdx | 20 ------------------- {routing => universal_api}/fallbacks.mdx | 0 .../routing.mdx | 2 +- 4 files changed, 3 insertions(+), 29 deletions(-) delete mode 100644 routing/overview.mdx rename {routing => universal_api}/fallbacks.mdx (100%) rename routing/routing_syntax.mdx => universal_api/routing.mdx (99%) diff --git a/mint.json b/mint.json index 0e0091f90..7dc6184b1 100644 --- a/mint.json +++ b/mint.json @@ -97,6 +97,8 @@ "universal_api/supported_arguments", "universal_api/prompts", "universal_api/responses", + "universal_api/fallbacks", + "universal_api/routing", "universal_api/fine_tuning", "universal_api/local_models", "universal_api/logging", @@ -105,14 +107,6 @@ "universal_api/chatbot" ] }, - { - "group": "Routing", - "pages": [ - "routing/overview", - "routing/fallbacks", - "routing/routing_syntax" - ] - }, "on_prem/overview" ] }, diff --git a/routing/overview.mdx b/routing/overview.mdx deleted file mode 100644 index 33f58b88e..000000000 --- a/routing/overview.mdx +++ /dev/null @@ -1,20 +0,0 @@ ---- -title: 'Overview' ---- - -LLM routing allows you to be flexible about which model, provider and endpoint handles -each prompt. Flexibility can be advantageous for several reasons: - -1. Small models are (in general) faster and cheaper, whereas bigger models are more capable. -2. Tasks often exhibit a range of difficulties, suitable for varying LLM capability -3. Different providers have different latencies, and these **change** over time. -4. New models come out every week, each having different strengths and weaknesses. - -

- -LLM routing provides: - -- Faster and cheaper responses when a smaller model is capable of answering -- Continuous improvement: ‘riding the wave’ of new model releases -- Ability to maximise throughput or minimise latency based on live runtime statistics -- Reliability via fallbacks, if providers go down or latency limits are hit diff --git a/routing/fallbacks.mdx b/universal_api/fallbacks.mdx similarity index 100% rename from routing/fallbacks.mdx rename to universal_api/fallbacks.mdx diff --git a/routing/routing_syntax.mdx b/universal_api/routing.mdx similarity index 99% rename from routing/routing_syntax.mdx rename to universal_api/routing.mdx index b967fafd2..4af42925a 100644 --- a/routing/routing_syntax.mdx +++ b/universal_api/routing.mdx @@ -1,5 +1,5 @@ --- -title: 'Routing Syntax' +title: 'Routing' --- In the previous section, we introduced *fallbacks*, which enable you to specify what to