t/plugin/ai-rate-limiting.t (114 lines of code) (raw):
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
use t::APISIX 'no_plan';
log_level("info");
repeat_each(1);
no_long_string();
no_root_location();
my $resp_file = 't/assets/ai-proxy-response.json';
open(my $fh, '<', $resp_file) or die "Could not open file '$resp_file' $!";
my $resp = do { local $/; <$fh> };
close($fh);
print "Hello, World!\n";
print $resp;
add_block_preprocessor(sub {
my ($block) = @_;
if (!defined $block->request) {
$block->set_value("request", "GET /t");
}
my $http_config = $block->http_config // <<_EOC_;
server {
server_name openai;
listen 16724;
default_type 'application/json';
location /anything {
content_by_lua_block {
local json = require("cjson.safe")
if ngx.req.get_method() ~= "POST" then
ngx.status = 400
ngx.say("Unsupported request method: ", ngx.req.get_method())
end
ngx.req.read_body()
local body = ngx.req.get_body_data()
if body ~= "SELECT * FROM STUDENTS" then
ngx.status = 503
ngx.say("passthrough doesn't work")
return
end
ngx.say('{"foo", "bar"}')
}
}
location /v1/chat/completions {
content_by_lua_block {
local json = require("cjson.safe")
if ngx.req.get_method() ~= "POST" then
ngx.status = 400
ngx.say("Unsupported request method: ", ngx.req.get_method())
end
ngx.req.read_body()
local body, err = ngx.req.get_body_data()
body, err = json.decode(body)
local test_type = ngx.req.get_headers()["test-type"]
if test_type == "options" then
if body.foo == "bar" then
ngx.status = 200
ngx.say("options works")
else
ngx.status = 500
ngx.say("model options feature doesn't work")
end
return
end
local header_auth = ngx.req.get_headers()["authorization"]
local query_auth = ngx.req.get_uri_args()["apikey"]
if header_auth ~= "Bearer token" and query_auth ~= "apikey" then
ngx.status = 401
ngx.say("Unauthorized")
return
end
if header_auth == "Bearer token" or query_auth == "apikey" then
ngx.req.read_body()
local body, err = ngx.req.get_body_data()
body, err = json.decode(body)
if not body.messages or #body.messages < 1 then
ngx.status = 400
ngx.say([[{ "error": "bad request"}]])
return
end
if body.messages[1].content == "write an SQL query to get all rows from student table" then
ngx.print("SELECT * FROM STUDENTS")
return
end
ngx.status = 200
ngx.say(string.format([[
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": { "content": "1 + 1 = 2.", "role": "assistant" }
}
],
"created": 1723780938,
"id": "chatcmpl-9wiSIg5LYrrpxwsr2PubSQnbtod1P",
"model": "%s",
"object": "chat.completion",
"system_fingerprint": "fp_abc28019ad",
"usage": { "completion_tokens": 5, "prompt_tokens": 8, "total_tokens": 10 }
}
]], body.model))
return
end
ngx.status = 503
ngx.say("reached the end of the test suite")
}
}
location /random {
content_by_lua_block {
ngx.say("path override works")
}
}
}
_EOC_
$block->set_value("http_config", $http_config);
});
run_tests();
__DATA__
=== TEST 1: sanity
--- config
location /t {
content_by_lua_block {
local configs = {
{
time_window = 60,
},
{
limit = 30,
},
{
limit = 30,
time_window = 60,
rejected_code = 199,
},
{
limit = 30,
time_window = 60,
limit_strategy = "invalid",
},
{
limit = 30,
time_window = 60,
instances = {
{
name = "instance1",
limit = 30,
time_window = 60,
},
{
limit = 30,
time_window = 60,
}
},
},
{
time_window = 60,
instances = {
{
name = "instance1",
limit = 30,
time_window = 60,
}
},
},
{
limit = 30,
instances = {
{
name = "instance1",
limit = 30,
time_window = 60,
}
},
},
{
instances = {
{
name = "instance1",
limit = 30,
time_window = 60,
}
},
},
{
limit = 30,
time_window = 60,
rejected_code = 403,
rejected_msg = "rate limit exceeded",
limit_strategy = "completion_tokens",
},
{
limit = 30,
time_window = 60,
instances = {
{
name = "instance1",
limit = 30,
time_window = 60,
}
},
}
}
local core = require("apisix.core")
local plugin = require("apisix.plugins.ai-rate-limiting")
for _, config in ipairs(configs) do
local ok, err = plugin.check_schema(config)
if not ok then
ngx.say(err)
else
ngx.say("passed")
end
end
ngx.say("done")
}
}
--- response_body
property "limit" is required when "time_window" is set
property "time_window" is required when "limit" is set
property "rejected_code" validation failed: expected 199 to be at least 200
property "limit_strategy" validation failed: matches none of the enum values
property "instances" validation failed: failed to validate item 2: property "name" is required
property "limit" is required when "time_window" is set
property "time_window" is required when "limit" is set
passed
passed
passed
done
=== TEST 2: set route 1, default limit_strategy: total_tokens
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/ai",
"plugins": {
"ai-proxy": {
"provider": "openai",
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-35-turbo-instruct",
"max_tokens": 512,
"temperature": 1.0
},
"override": {
"endpoint": "http://localhost:16724"
},
"ssl_verify": false
},
"ai-rate-limiting": {
"limit": 30,
"time_window": 60
}
},
"upstream": {
"type": "roundrobin",
"nodes": {
"canbeanything.com": 1
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 3: reject the 3th request
--- pipelined_requests eval
[
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
]
--- more_headers
Authorization: Bearer token
--- error_code eval
[200, 200, 200, 503]
=== TEST 4: set rejected_code to 403, rejected_msg to "rate limit exceeded"
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/ai",
"plugins": {
"ai-proxy": {
"provider": "openai",
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-35-turbo-instruct",
"max_tokens": 512,
"temperature": 1.0
},
"override": {
"endpoint": "http://localhost:16724"
},
"ssl_verify": false
},
"ai-rate-limiting": {
"limit": 30,
"time_window": 60,
"rejected_code": 403,
"rejected_msg": "rate limit exceeded"
}
},
"upstream": {
"type": "roundrobin",
"nodes": {
"canbeanything.com": 1
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 5: check code and message
--- pipelined_requests eval
[
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
]
--- more_headers
Authorization: Bearer token
--- error_code eval
[200, 200, 200, 403]
--- response_body eval
[
qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/,
qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/,
qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/,
qr/\{"error_msg":"rate limit exceeded"\}/,
]
=== TEST 6: check rate limit headers
--- request
POST /ai
{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] }
--- more_headers
Authorization: Bearer token
--- response_headers
X-AI-RateLimit-Limit-ai-proxy: 30
X-AI-RateLimit-Remaining-ai-proxy: 29
X-AI-RateLimit-Reset-ai-proxy: 60
=== TEST 7: check rate limit headers after 4 requests
--- pipelined_requests eval
[
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
]
--- more_header
Authorization: Bearer token
--- error_code eval
[200, 200, 200, 403]
--- response_headers eval
[
"X-AI-RateLimit-Remaining-ai-proxy: 29",
"X-AI-RateLimit-Remaining-ai-proxy: 19",
"X-AI-RateLimit-Remaining-ai-proxy: 9",
"X-AI-RateLimit-Remaining-ai-proxy: 0",
]
=== TEST 8: set route2 with limit_strategy: completion_tokens
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/2',
ngx.HTTP_PUT,
[[{
"uri": "/ai2",
"plugins": {
"ai-proxy": {
"provider": "openai",
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-35-turbo-instruct",
"max_tokens": 512,
"temperature": 1.0
},
"override": {
"endpoint": "http://localhost:16724"
},
"ssl_verify": false
},
"ai-rate-limiting": {
"limit": 20,
"time_window": 45,
"limit_strategy": "completion_tokens"
}
},
"upstream": {
"type": "roundrobin",
"nodes": {
"canbeanything.com": 1
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 9: reject the 5th request
--- pipelined_requests eval
[
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
]
--- more_headers
Authorization: Bearer token
--- error_code eval
[200, 200, 200, 200, 503]
=== TEST 10: check rate limit headers
--- request
POST /ai2
{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] }
--- more_headers
Authorization: Bearer token
--- response_headers
X-AI-RateLimit-Limit-ai-proxy: 20
X-AI-RateLimit-Remaining-ai-proxy: 19
X-AI-RateLimit-Reset-ai-proxy: 45
=== TEST 11: multi-request
--- pipelined_requests eval
[
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
]
--- more_header
Authorization: Bearer token
--- error_code eval
[200, 200, 200, 200, 503]
--- response_headers eval
[
"X-AI-RateLimit-Remaining-ai-proxy: 19",
"X-AI-RateLimit-Remaining-ai-proxy: 14",
"X-AI-RateLimit-Remaining-ai-proxy: 9",
"X-AI-RateLimit-Remaining-ai-proxy: 4",
"X-AI-RateLimit-Remaining-ai-proxy: 0",
]
=== TEST 12: request route 1 and route 2
--- pipelined_requests eval
[
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
]
--- more_headers
Authorization: Bearer token
--- error_code eval
[200, 200, 200, 200, 200, 200, 200, 403, 503]
=== TEST 13: ai-rate-limiting & ai-proxy-multi, with instance_health_and_rate_limiting strategy
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/ai",
"plugins": {
"ai-proxy-multi": {
"fallback_strategy": "instance_health_and_rate_limiting",
"instances": [
{
"name": "openai-gpt4",
"provider": "openai",
"weight": 1,
"priority": 1,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-4"
},
"override": {
"endpoint": "http://localhost:16724"
}
},
{
"name": "openai-gpt3",
"provider": "openai",
"weight": 1,
"priority": 0,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-3"
},
"override": {
"endpoint": "http://localhost:16724"
}
}
],
"ssl_verify": false
},
"ai-rate-limiting": {
"limit": 10,
"time_window": 60
}
},
"upstream": {
"type": "roundrobin",
"nodes": {
"canbeanything.com": 1
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 14: fallback strategy should works
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local core = require("apisix.core")
local code, _, body = t("/ai",
ngx.HTTP_POST,
[[{
"messages": [
{ "role": "system", "content": "You are a mathematician" },
{ "role": "user", "content": "What is 1+1?" }
]
}]],
nil,
{
["test-type"] = "options",
["Content-Type"] = "application/json",
}
)
assert(code == 200, "first request should be successful")
assert(core.string.find(body, "gpt-4"),
"first request should be handled by higher priority instance")
local code, _, body = t("/ai",
ngx.HTTP_POST,
[[{
"messages": [
{ "role": "system", "content": "You are a mathematician" },
{ "role": "user", "content": "What is 1+1?" }
]
}]],
nil,
{
["test-type"] = "options",
["Content-Type"] = "application/json",
}
)
assert(code == 200, "second request should be successful")
assert(core.string.find(body, "gpt-3"),
"second request should be handled by lower priority instance")
local code, body = t("/ai",
ngx.HTTP_POST,
[[{
"messages": [
{ "role": "system", "content": "You are a mathematician" },
{ "role": "user", "content": "What is 1+1?" }
]
}]],
nil,
{
["test-type"] = "options",
["Content-Type"] = "application/json",
}
)
assert(code == 503, "third request should be failed")
assert(core.string.find(body, "all servers tried"), "all servers tried")
ngx.say("passed")
}
}
--- response_body
passed
=== TEST 15: limiting to only one instance
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/ai",
"plugins": {
"ai-proxy-multi": {
"fallback_strategy": "instance_health_and_rate_limiting",
"instances": [
{
"name": "openai-gpt4",
"provider": "openai",
"weight": 1,
"priority": 1,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-4"
},
"override": {
"endpoint": "http://localhost:16724"
}
},
{
"name": "openai-gpt3",
"provider": "openai",
"weight": 1,
"priority": 0,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-3"
},
"override": {
"endpoint": "http://localhost:16724"
}
}
],
"ssl_verify": false
},
"ai-rate-limiting": {
"instances": [
{
"name": "openai-gpt4",
"limit": 20,
"time_window": 60
}
]
}
},
"upstream": {
"type": "roundrobin",
"nodes": {
"canbeanything.com": 1
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 16: 10 requests, 8 should be handled by gpt-3, 2 should be handled by gpt-4
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local core = require("apisix.core")
local instances_count = {}
for i = 1, 10 do
local code, _, body = t("/ai",
ngx.HTTP_POST,
[[{
"messages": [
{ "role": "system", "content": "You are a mathematician" },
{ "role": "user", "content": "What is 1+1?" }
]
}]],
nil,
{
["test-type"] = "options",
["Content-Type"] = "application/json",
}
)
assert(code == 200, "first request should be successful")
if core.string.find(body, "gpt-4") then
instances_count["gpt-4"] = (instances_count["gpt-4"] or 0) + 1
else
instances_count["gpt-3"] = (instances_count["gpt-3"] or 0) + 1
end
end
ngx.log(ngx.INFO, "instances_count test:", core.json.delay_encode(instances_count))
assert(instances_count["gpt-4"] <= 2, "gpt-4 should be handled by higher priority instance")
assert(instances_count["gpt-3"] >= 8, "gpt-3 should be handled by lower priority instance")
ngx.say("passed")
}
}
--- response_body
passed
=== TEST 17: each instance uses different current limiting
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/ai",
"plugins": {
"ai-proxy-multi": {
"fallback_strategy": "instance_health_and_rate_limiting",
"instances": [
{
"name": "openai-gpt4",
"provider": "openai",
"weight": 1,
"priority": 1,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-4"
},
"override": {
"endpoint": "http://localhost:16724"
}
},
{
"name": "openai-gpt3",
"provider": "openai",
"weight": 1,
"priority": 0,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-3"
},
"override": {
"endpoint": "http://localhost:16724"
}
}
],
"ssl_verify": false
},
"ai-rate-limiting": {
"instances": [
{
"name": "openai-gpt3",
"limit": 50,
"time_window": 60
},
{
"name": "openai-gpt4",
"limit": 20,
"time_window": 60
}
]
}
},
"upstream": {
"type": "roundrobin",
"nodes": {
"canbeanything.com": 1
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 18: gpt3 allows 5 requests, gpt4 allows 2 requests
--- pipelined_requests eval
[
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
]
--- more_headers
Authorization: Bearer token
--- error_code eval
[200, 200, 200, 200, 200, 200, 200, 503, 503]
=== TEST 19: set limit & instances
--- config
location /t {
content_by_lua_block {
local t = require("lib.test_admin").test
local code, body = t('/apisix/admin/routes/1',
ngx.HTTP_PUT,
[[{
"uri": "/ai",
"plugins": {
"ai-proxy-multi": {
"fallback_strategy": "instance_health_and_rate_limiting",
"instances": [
{
"name": "openai-gpt4",
"provider": "openai",
"weight": 1,
"priority": 1,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-4"
},
"override": {
"endpoint": "http://localhost:16724"
}
},
{
"name": "openai-gpt3",
"provider": "openai",
"weight": 1,
"priority": 0,
"auth": {
"header": {
"Authorization": "Bearer token"
}
},
"options": {
"model": "gpt-3"
},
"override": {
"endpoint": "http://localhost:16724"
}
}
],
"ssl_verify": false
},
"ai-rate-limiting": {
"limit": 20,
"time_window": 60,
"instances": [
{
"name": "openai-gpt3",
"limit": 50,
"time_window": 60
}
]
}
},
"upstream": {
"type": "roundrobin",
"nodes": {
"canbeanything.com": 1
}
}
}]]
)
if code >= 300 then
ngx.status = code
end
ngx.say(body)
}
}
--- response_body
passed
=== TEST 20: gpt3 allows 5 requests, gpt4 allows 2 requests
--- pipelined_requests eval
[
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
]
--- more_headers
Authorization: Bearer token
--- error_code eval
[200, 200, 200, 200, 200, 200, 200, 503, 503]