Skip to content

Commit 52d8fea

Browse files
feat: ai-rate-limiting plugin (#12037)
1 parent b4980df commit 52d8fea

File tree

10 files changed

+907
-12
lines changed

10 files changed

+907
-12
lines changed

apisix/cli/config.lua

+3
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ local _M = {
160160
["plugin-limit-req-redis-cluster-slot-lock"] = "1m",
161161
["plugin-limit-count-redis-cluster-slot-lock"] = "1m",
162162
["plugin-limit-conn-redis-cluster-slot-lock"] = "1m",
163+
["plugin-ai-rate-limiting"] = "10m",
164+
["plugin-ai-rate-limiting-reset-header"] = "10m",
163165
tracing_buffer = "10m",
164166
["plugin-api-breaker"] = "10m",
165167
["etcd-cluster-health-check"] = "10m",
@@ -219,6 +221,7 @@ local _M = {
219221
"ai-prompt-decorator",
220222
"ai-prompt-guard",
221223
"ai-rag",
224+
"ai-rate-limiting",
222225
"ai-proxy-multi",
223226
"ai-proxy",
224227
"ai-aws-content-moderation",

apisix/cli/ngx_tpl.lua

+13-1
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,19 @@ http {
287287
lua_shared_dict tars {* http.lua_shared_dict["tars"] *};
288288
{% end %}
289289
290+
291+
{% if http.lua_shared_dict["plugin-ai-rate-limiting"] then %}
292+
lua_shared_dict plugin-ai-rate-limiting {* http.lua_shared_dict["plugin-ai-rate-limiting"] *};
293+
{% else %}
294+
lua_shared_dict plugin-ai-rate-limiting 10m;
295+
{% end %}
296+
297+
{% if http.lua_shared_dict["plugin-ai-rate-limiting"] then %}
298+
lua_shared_dict plugin-ai-rate-limiting-reset-header {* http.lua_shared_dict["plugin-ai-rate-limiting-reset-header"] *};
299+
{% else %}
300+
lua_shared_dict plugin-ai-rate-limiting-reset-header 10m;
301+
{% end %}
302+
290303
{% if enabled_plugins["limit-conn"] then %}
291304
lua_shared_dict plugin-limit-conn {* http.lua_shared_dict["plugin-limit-conn"] *};
292305
lua_shared_dict plugin-limit-conn-redis-cluster-slot-lock {* http.lua_shared_dict["plugin-limit-conn-redis-cluster-slot-lock"] *};
@@ -418,7 +431,6 @@ http {
418431
{% if ssl.ssl_trusted_certificate ~= nil then %}
419432
lua_ssl_trusted_certificate {* ssl.ssl_trusted_certificate *};
420433
{% end %}
421-
422434
# http configuration snippet starts
423435
{% if http_configuration_snippet then %}
424436
{* http_configuration_snippet *}

apisix/plugins/ai-rate-limiting.lua

+209
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
--
2+
-- Licensed to the Apache Software Foundation (ASF) under one or more
3+
-- contributor license agreements. See the NOTICE file distributed with
4+
-- this work for additional information regarding copyright ownership.
5+
-- The ASF licenses this file to You under the Apache License, Version 2.0
6+
-- (the "License"); you may not use this file except in compliance with
7+
-- the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing, software
12+
-- distributed under the License is distributed on an "AS IS" BASIS,
13+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
-- See the License for the specific language governing permissions and
15+
-- limitations under the License.
16+
--
17+
local require = require
18+
local setmetatable = setmetatable
19+
local ipairs = ipairs
20+
local type = type
21+
local core = require("apisix.core")
22+
local limit_count = require("apisix.plugins.limit-count.init")
23+
24+
local plugin_name = "ai-rate-limiting"
25+
26+
local instance_limit_schema = {
27+
type = "object",
28+
properties = {
29+
name = {type = "string"},
30+
limit = {type = "integer", minimum = 1},
31+
time_window = {type = "integer", minimum = 1}
32+
},
33+
required = {"name", "limit", "time_window"}
34+
}
35+
36+
local schema = {
37+
type = "object",
38+
properties = {
39+
limit = {type = "integer", exclusiveMinimum = 0},
40+
time_window = {type = "integer", exclusiveMinimum = 0},
41+
show_limit_quota_header = {type = "boolean", default = true},
42+
limit_strategy = {
43+
type = "string",
44+
enum = {"total_tokens", "prompt_tokens", "completion_tokens"},
45+
default = "total_tokens",
46+
description = "The strategy to limit the tokens"
47+
},
48+
instances = {
49+
type = "array",
50+
items = instance_limit_schema
51+
},
52+
rejected_code = {
53+
type = "integer", minimum = 200, maximum = 599, default = 503
54+
},
55+
rejected_msg = {
56+
type = "string", minLength = 1
57+
},
58+
},
59+
required = {"limit", "time_window"},
60+
}
61+
62+
local _M = {
63+
version = 0.1,
64+
priority = 1030,
65+
name = plugin_name,
66+
schema = schema
67+
}
68+
69+
local limit_conf_cache = core.lrucache.new({
70+
ttl = 300, count = 512
71+
})
72+
73+
74+
function _M.check_schema(conf)
75+
return core.schema.check(schema, conf)
76+
end
77+
78+
79+
local function transform_limit_conf(plugin_conf, instance_conf, instance_name)
80+
local key = plugin_name .. "#global"
81+
local limit = plugin_conf.limit
82+
local time_window = plugin_conf.time_window
83+
local name = instance_name or ""
84+
if instance_conf then
85+
name = instance_conf.name
86+
key = instance_conf.name
87+
limit = instance_conf.limit
88+
time_window = instance_conf.time_window
89+
end
90+
return {
91+
_vid = key,
92+
93+
key = key,
94+
count = limit,
95+
time_window = time_window,
96+
rejected_code = plugin_conf.rejected_code,
97+
rejected_msg = plugin_conf.rejected_msg,
98+
show_limit_quota_header = plugin_conf.show_limit_quota_header,
99+
-- limit-count need these fields
100+
policy = "local",
101+
key_type = "constant",
102+
allow_degradation = false,
103+
sync_interval = -1,
104+
105+
limit_header = "X-AI-RateLimit-Limit-" .. name,
106+
remaining_header = "X-AI-RateLimit-Remaining-" .. name,
107+
reset_header = "X-AI-RateLimit-Reset-" .. name,
108+
}
109+
end
110+
111+
112+
local function fetch_limit_conf_kvs(conf)
113+
local mt = {
114+
__index = function(t, k)
115+
local limit_conf = transform_limit_conf(conf, nil, k)
116+
t[k] = limit_conf
117+
return limit_conf
118+
end
119+
}
120+
local limit_conf_kvs = setmetatable({}, mt)
121+
local conf_instances = conf.instances or {}
122+
for _, limit_conf in ipairs(conf_instances) do
123+
limit_conf_kvs[limit_conf.name] = transform_limit_conf(conf, limit_conf)
124+
end
125+
return limit_conf_kvs
126+
end
127+
128+
129+
function _M.access(conf, ctx)
130+
local ai_instance_name = ctx.picked_ai_instance_name
131+
if not ai_instance_name then
132+
return
133+
end
134+
135+
local limit_conf_kvs = limit_conf_cache(conf, nil, fetch_limit_conf_kvs, conf)
136+
local limit_conf = limit_conf_kvs[ai_instance_name]
137+
local code, msg = limit_count.rate_limit(limit_conf, ctx, plugin_name, 1, true)
138+
ctx.ai_rate_limiting = code and true or false
139+
return code, msg
140+
end
141+
142+
143+
function _M.check_instance_status(conf, ctx, instance_name)
144+
if conf == nil then
145+
local plugins = ctx.plugins
146+
for _, plugin in ipairs(plugins) do
147+
if plugin.name == plugin_name then
148+
conf = plugin
149+
end
150+
end
151+
end
152+
if not conf then
153+
return true
154+
end
155+
156+
instance_name = instance_name or ctx.picked_ai_instance_name
157+
if not instance_name then
158+
return nil, "missing instance_name"
159+
end
160+
161+
if type(instance_name) ~= "string" then
162+
return nil, "invalid instance_name"
163+
end
164+
165+
local limit_conf_kvs = limit_conf_cache(conf, nil, fetch_limit_conf_kvs, conf)
166+
local limit_conf = limit_conf_kvs[instance_name]
167+
local code, _ = limit_count.rate_limit(limit_conf, ctx, plugin_name, 1, true)
168+
if code then
169+
core.log.info("rate limit for instance: ", instance_name, " code: ", code)
170+
return false
171+
end
172+
return true
173+
end
174+
175+
176+
local function get_token_usage(conf, ctx)
177+
local usage = ctx.ai_token_usage
178+
if not usage then
179+
return
180+
end
181+
return usage[conf.limit_strategy]
182+
end
183+
184+
185+
function _M.log(conf, ctx)
186+
local instance_name = ctx.picked_ai_instance_name
187+
if not instance_name then
188+
return
189+
end
190+
191+
if ctx.ai_rate_limiting then
192+
return
193+
end
194+
195+
local used_tokens = get_token_usage(conf, ctx)
196+
if not used_tokens then
197+
core.log.error("failed to get token usage for llm service")
198+
return
199+
end
200+
201+
core.log.info("instance name: ", instance_name, " used tokens: ", used_tokens)
202+
203+
local limit_conf_kvs = limit_conf_cache(conf, nil, fetch_limit_conf_kvs, conf)
204+
local limit_conf = limit_conf_kvs[instance_name]
205+
limit_count.rate_limit(limit_conf, ctx, plugin_name, used_tokens)
206+
end
207+
208+
209+
return _M

apisix/plugins/limit-count/init.lua

+20-10
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ local ipairs = ipairs
2121
local pairs = pairs
2222
local redis_schema = require("apisix.utils.redis-schema")
2323
local policy_to_additional_properties = redis_schema.schema
24+
local get_phase = ngx.get_phase
2425

2526
local limit_redis_cluster_new
2627
local limit_redis_new
@@ -233,8 +234,9 @@ local function gen_limit_obj(conf, ctx, plugin_name)
233234
return core.lrucache.plugin_ctx(lrucache, ctx, extra_key, create_limit_obj, conf, plugin_name)
234235
end
235236

236-
function _M.rate_limit(conf, ctx, name, cost)
237+
function _M.rate_limit(conf, ctx, name, cost, dry_run)
237238
core.log.info("ver: ", ctx.conf_version)
239+
core.log.info("conf: ", core.json.delay_encode(conf, true))
238240

239241
local lim, err = gen_limit_obj(conf, ctx, name)
240242

@@ -275,7 +277,7 @@ function _M.rate_limit(conf, ctx, name, cost)
275277

276278
local delay, remaining, reset
277279
if not conf.policy or conf.policy == "local" then
278-
delay, remaining, reset = lim:incoming(key, true, conf, cost)
280+
delay, remaining, reset = lim:incoming(key, not dry_run, conf, cost)
279281
else
280282
delay, remaining, reset = lim:incoming(key, cost)
281283
end
@@ -288,14 +290,22 @@ function _M.rate_limit(conf, ctx, name, cost)
288290
end
289291
core.log.info("limit-count plugin-metadata: ", core.json.delay_encode(metadata))
290292

293+
local set_limit_headers = {
294+
limit_header = conf.limit_header or metadata.limit_header,
295+
remaining_header = conf.remaining_header or metadata.remaining_header,
296+
reset_header = conf.reset_header or metadata.reset_header,
297+
}
298+
local phase = get_phase()
299+
local set_header = phase ~= "log"
300+
291301
if not delay then
292302
local err = remaining
293303
if err == "rejected" then
294304
-- show count limit header when rejected
295-
if conf.show_limit_quota_header then
296-
core.response.set_header(metadata.limit_header, conf.count,
297-
metadata.remaining_header, 0,
298-
metadata.reset_header, reset)
305+
if conf.show_limit_quota_header and set_header then
306+
core.response.set_header(set_limit_headers.limit_header, conf.count,
307+
set_limit_headers.remaining_header, 0,
308+
set_limit_headers.reset_header, reset)
299309
end
300310

301311
if conf.rejected_msg then
@@ -311,10 +321,10 @@ function _M.rate_limit(conf, ctx, name, cost)
311321
return 500, {error_msg = "failed to limit count"}
312322
end
313323

314-
if conf.show_limit_quota_header then
315-
core.response.set_header(metadata.limit_header, conf.count,
316-
metadata.remaining_header, remaining,
317-
metadata.reset_header, reset)
324+
if conf.show_limit_quota_header and set_header then
325+
core.response.set_header(set_limit_headers.limit_header, conf.count,
326+
set_limit_headers.remaining_header, remaining,
327+
set_limit_headers.reset_header, reset)
318328
end
319329
end
320330

conf/config.yaml.example

+1
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ plugins: # plugin list (sorted by priority)
480480
- ai-prompt-decorator # priority: 1070
481481
- ai-prompt-guard # priority: 1072
482482
- ai-rag # priority: 1060
483+
- ai-rate-limiting # priority: 1030
483484
- ai-aws-content-moderation # priority: 1040 TODO: compare priority with other ai plugins
484485
- proxy-mirror # priority: 1010
485486
- proxy-rewrite # priority: 1008

docs/en/latest/config.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,8 @@
158158
"plugins/request-id",
159159
"plugins/proxy-control",
160160
"plugins/client-control",
161-
"plugins/workflow"
161+
"plugins/workflow",
162+
"plugins/ai-rate-limiting"
162163
]
163164
},
164165
{

0 commit comments

Comments
 (0)