t/plugin/ai-rate-limiting.t

# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # use t::APISIX 'no_plan'; log_level("info"); repeat_each(1); no_long_string(); no_root_location(); my $resp_file = 't/assets/ai-proxy-response.json'; open(my $fh, '<', $resp_file) or die "Could not open file '$resp_file' $!"; my $resp = do { local $/; <$fh> }; close($fh); print "Hello, World!\n"; print $resp; add_block_preprocessor(sub { my ($block) = @_; if (!defined $block->request) { $block->set_value("request", "GET /t"); } my $http_config = $block->http_config // <<_EOC_; server { server_name openai; listen 16724; default_type 'application/json'; location /anything { content_by_lua_block { local json = require("cjson.safe") if ngx.req.get_method() ~= "POST" then ngx.status = 400 ngx.say("Unsupported request method: ", ngx.req.get_method()) end ngx.req.read_body() local body = ngx.req.get_body_data() if body ~= "SELECT * FROM STUDENTS" then ngx.status = 503 ngx.say("passthrough doesn't work") return end ngx.say('{"foo", "bar"}') } } location /v1/chat/completions { content_by_lua_block { local json = require("cjson.safe") if ngx.req.get_method() ~= "POST" then ngx.status = 400 ngx.say("Unsupported request method: ", ngx.req.get_method()) end ngx.req.read_body() local body, err = ngx.req.get_body_data() body, err = json.decode(body) local test_type = ngx.req.get_headers()["test-type"] if test_type == "options" then if body.foo == "bar" then ngx.status = 200 ngx.say("options works") else ngx.status = 500 ngx.say("model options feature doesn't work") end return end local header_auth = ngx.req.get_headers()["authorization"] local query_auth = ngx.req.get_uri_args()["apikey"] if header_auth ~= "Bearer token" and query_auth ~= "apikey" then ngx.status = 401 ngx.say("Unauthorized") return end if header_auth == "Bearer token" or query_auth == "apikey" then ngx.req.read_body() local body, err = ngx.req.get_body_data() body, err = json.decode(body) if not body.messages or #body.messages < 1 then ngx.status = 400 ngx.say([[{ "error": "bad request"}]]) return end if body.messages[1].content == "write an SQL query to get all rows from student table" then ngx.print("SELECT * FROM STUDENTS") return end ngx.status = 200 ngx.say(string.format([[ { "choices": [ { "finish_reason": "stop", "index": 0, "message": { "content": "1 + 1 = 2.", "role": "assistant" } } ], "created": 1723780938, "id": "chatcmpl-9wiSIg5LYrrpxwsr2PubSQnbtod1P", "model": "%s", "object": "chat.completion", "system_fingerprint": "fp_abc28019ad", "usage": { "completion_tokens": 5, "prompt_tokens": 8, "total_tokens": 10 } } ]], body.model)) return end ngx.status = 503 ngx.say("reached the end of the test suite") } } location /random { content_by_lua_block { ngx.say("path override works") } } } _EOC_ $block->set_value("http_config", $http_config); }); run_tests(); __DATA__ === TEST 1: sanity --- config location /t { content_by_lua_block { local configs = { { time_window = 60, }, { limit = 30, }, { limit = 30, time_window = 60, rejected_code = 199, }, { limit = 30, time_window = 60, limit_strategy = "invalid", }, { limit = 30, time_window = 60, instances = { { name = "instance1", limit = 30, time_window = 60, }, { limit = 30, time_window = 60, } }, }, { time_window = 60, instances = { { name = "instance1", limit = 30, time_window = 60, } }, }, { limit = 30, instances = { { name = "instance1", limit = 30, time_window = 60, } }, }, { instances = { { name = "instance1", limit = 30, time_window = 60, } }, }, { limit = 30, time_window = 60, rejected_code = 403, rejected_msg = "rate limit exceeded", limit_strategy = "completion_tokens", }, { limit = 30, time_window = 60, instances = { { name = "instance1", limit = 30, time_window = 60, } }, } } local core = require("apisix.core") local plugin = require("apisix.plugins.ai-rate-limiting") for _, config in ipairs(configs) do local ok, err = plugin.check_schema(config) if not ok then ngx.say(err) else ngx.say("passed") end end ngx.say("done") } } --- response_body property "limit" is required when "time_window" is set property "time_window" is required when "limit" is set property "rejected_code" validation failed: expected 199 to be at least 200 property "limit_strategy" validation failed: matches none of the enum values property "instances" validation failed: failed to validate item 2: property "name" is required property "limit" is required when "time_window" is set property "time_window" is required when "limit" is set passed passed passed done === TEST 2: set route 1, default limit_strategy: total_tokens --- config location /t { content_by_lua_block { local t = require("lib.test_admin").test local code, body = t('/apisix/admin/routes/1', ngx.HTTP_PUT, [[{ "uri": "/ai", "plugins": { "ai-proxy": { "provider": "openai", "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-35-turbo-instruct", "max_tokens": 512, "temperature": 1.0 }, "override": { "endpoint": "http://localhost:16724" }, "ssl_verify": false }, "ai-rate-limiting": { "limit": 30, "time_window": 60 } }, "upstream": { "type": "roundrobin", "nodes": { "canbeanything.com": 1 } } }]] ) if code >= 300 then ngx.status = code end ngx.say(body) } } --- response_body passed === TEST 3: reject the 3th request --- pipelined_requests eval [ "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", ] --- more_headers Authorization: Bearer token --- error_code eval [200, 200, 200, 503] === TEST 4: set rejected_code to 403, rejected_msg to "rate limit exceeded" --- config location /t { content_by_lua_block { local t = require("lib.test_admin").test local code, body = t('/apisix/admin/routes/1', ngx.HTTP_PUT, [[{ "uri": "/ai", "plugins": { "ai-proxy": { "provider": "openai", "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-35-turbo-instruct", "max_tokens": 512, "temperature": 1.0 }, "override": { "endpoint": "http://localhost:16724" }, "ssl_verify": false }, "ai-rate-limiting": { "limit": 30, "time_window": 60, "rejected_code": 403, "rejected_msg": "rate limit exceeded" } }, "upstream": { "type": "roundrobin", "nodes": { "canbeanything.com": 1 } } }]] ) if code >= 300 then ngx.status = code end ngx.say(body) } } --- response_body passed === TEST 5: check code and message --- pipelined_requests eval [ "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", ] --- more_headers Authorization: Bearer token --- error_code eval [200, 200, 200, 403] --- response_body eval [ qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/, qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/, qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/, qr/\{"error_msg":"rate limit exceeded"\}/, ] === TEST 6: check rate limit headers --- request POST /ai { "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] } --- more_headers Authorization: Bearer token --- response_headers X-AI-RateLimit-Limit-ai-proxy: 30 X-AI-RateLimit-Remaining-ai-proxy: 29 X-AI-RateLimit-Reset-ai-proxy: 60 === TEST 7: check rate limit headers after 4 requests --- pipelined_requests eval [ "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", ] --- more_header Authorization: Bearer token --- error_code eval [200, 200, 200, 403] --- response_headers eval [ "X-AI-RateLimit-Remaining-ai-proxy: 29", "X-AI-RateLimit-Remaining-ai-proxy: 19", "X-AI-RateLimit-Remaining-ai-proxy: 9", "X-AI-RateLimit-Remaining-ai-proxy: 0", ] === TEST 8: set route2 with limit_strategy: completion_tokens --- config location /t { content_by_lua_block { local t = require("lib.test_admin").test local code, body = t('/apisix/admin/routes/2', ngx.HTTP_PUT, [[{ "uri": "/ai2", "plugins": { "ai-proxy": { "provider": "openai", "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-35-turbo-instruct", "max_tokens": 512, "temperature": 1.0 }, "override": { "endpoint": "http://localhost:16724" }, "ssl_verify": false }, "ai-rate-limiting": { "limit": 20, "time_window": 45, "limit_strategy": "completion_tokens" } }, "upstream": { "type": "roundrobin", "nodes": { "canbeanything.com": 1 } } }]] ) if code >= 300 then ngx.status = code end ngx.say(body) } } --- response_body passed === TEST 9: reject the 5th request --- pipelined_requests eval [ "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", ] --- more_headers Authorization: Bearer token --- error_code eval [200, 200, 200, 200, 503] === TEST 10: check rate limit headers --- request POST /ai2 { "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] } --- more_headers Authorization: Bearer token --- response_headers X-AI-RateLimit-Limit-ai-proxy: 20 X-AI-RateLimit-Remaining-ai-proxy: 19 X-AI-RateLimit-Reset-ai-proxy: 45 === TEST 11: multi-request --- pipelined_requests eval [ "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", ] --- more_header Authorization: Bearer token --- error_code eval [200, 200, 200, 200, 503] --- response_headers eval [ "X-AI-RateLimit-Remaining-ai-proxy: 19", "X-AI-RateLimit-Remaining-ai-proxy: 14", "X-AI-RateLimit-Remaining-ai-proxy: 9", "X-AI-RateLimit-Remaining-ai-proxy: 4", "X-AI-RateLimit-Remaining-ai-proxy: 0", ] === TEST 12: request route 1 and route 2 --- pipelined_requests eval [ "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", ] --- more_headers Authorization: Bearer token --- error_code eval [200, 200, 200, 200, 200, 200, 200, 403, 503] === TEST 13: ai-rate-limiting & ai-proxy-multi, with instance_health_and_rate_limiting strategy --- config location /t { content_by_lua_block { local t = require("lib.test_admin").test local code, body = t('/apisix/admin/routes/1', ngx.HTTP_PUT, [[{ "uri": "/ai", "plugins": { "ai-proxy-multi": { "fallback_strategy": "instance_health_and_rate_limiting", "instances": [ { "name": "openai-gpt4", "provider": "openai", "weight": 1, "priority": 1, "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-4" }, "override": { "endpoint": "http://localhost:16724" } }, { "name": "openai-gpt3", "provider": "openai", "weight": 1, "priority": 0, "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-3" }, "override": { "endpoint": "http://localhost:16724" } } ], "ssl_verify": false }, "ai-rate-limiting": { "limit": 10, "time_window": 60 } }, "upstream": { "type": "roundrobin", "nodes": { "canbeanything.com": 1 } } }]] ) if code >= 300 then ngx.status = code end ngx.say(body) } } --- response_body passed === TEST 14: fallback strategy should works --- config location /t { content_by_lua_block { local t = require("lib.test_admin").test local core = require("apisix.core") local code, _, body = t("/ai", ngx.HTTP_POST, [[{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?" } ] }]], nil, { ["test-type"] = "options", ["Content-Type"] = "application/json", } ) assert(code == 200, "first request should be successful") assert(core.string.find(body, "gpt-4"), "first request should be handled by higher priority instance") local code, _, body = t("/ai", ngx.HTTP_POST, [[{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?" } ] }]], nil, { ["test-type"] = "options", ["Content-Type"] = "application/json", } ) assert(code == 200, "second request should be successful") assert(core.string.find(body, "gpt-3"), "second request should be handled by lower priority instance") local code, body = t("/ai", ngx.HTTP_POST, [[{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?" } ] }]], nil, { ["test-type"] = "options", ["Content-Type"] = "application/json", } ) assert(code == 503, "third request should be failed") assert(core.string.find(body, "all servers tried"), "all servers tried") ngx.say("passed") } } --- response_body passed === TEST 15: limiting to only one instance --- config location /t { content_by_lua_block { local t = require("lib.test_admin").test local code, body = t('/apisix/admin/routes/1', ngx.HTTP_PUT, [[{ "uri": "/ai", "plugins": { "ai-proxy-multi": { "fallback_strategy": "instance_health_and_rate_limiting", "instances": [ { "name": "openai-gpt4", "provider": "openai", "weight": 1, "priority": 1, "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-4" }, "override": { "endpoint": "http://localhost:16724" } }, { "name": "openai-gpt3", "provider": "openai", "weight": 1, "priority": 0, "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-3" }, "override": { "endpoint": "http://localhost:16724" } } ], "ssl_verify": false }, "ai-rate-limiting": { "instances": [ { "name": "openai-gpt4", "limit": 20, "time_window": 60 } ] } }, "upstream": { "type": "roundrobin", "nodes": { "canbeanything.com": 1 } } }]] ) if code >= 300 then ngx.status = code end ngx.say(body) } } --- response_body passed === TEST 16: 10 requests, 8 should be handled by gpt-3, 2 should be handled by gpt-4 --- config location /t { content_by_lua_block { local t = require("lib.test_admin").test local core = require("apisix.core") local instances_count = {} for i = 1, 10 do local code, _, body = t("/ai", ngx.HTTP_POST, [[{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?" } ] }]], nil, { ["test-type"] = "options", ["Content-Type"] = "application/json", } ) assert(code == 200, "first request should be successful") if core.string.find(body, "gpt-4") then instances_count["gpt-4"] = (instances_count["gpt-4"] or 0) + 1 else instances_count["gpt-3"] = (instances_count["gpt-3"] or 0) + 1 end end ngx.log(ngx.INFO, "instances_count test:", core.json.delay_encode(instances_count)) assert(instances_count["gpt-4"] <= 2, "gpt-4 should be handled by higher priority instance") assert(instances_count["gpt-3"] >= 8, "gpt-3 should be handled by lower priority instance") ngx.say("passed") } } --- response_body passed === TEST 17: each instance uses different current limiting --- config location /t { content_by_lua_block { local t = require("lib.test_admin").test local code, body = t('/apisix/admin/routes/1', ngx.HTTP_PUT, [[{ "uri": "/ai", "plugins": { "ai-proxy-multi": { "fallback_strategy": "instance_health_and_rate_limiting", "instances": [ { "name": "openai-gpt4", "provider": "openai", "weight": 1, "priority": 1, "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-4" }, "override": { "endpoint": "http://localhost:16724" } }, { "name": "openai-gpt3", "provider": "openai", "weight": 1, "priority": 0, "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-3" }, "override": { "endpoint": "http://localhost:16724" } } ], "ssl_verify": false }, "ai-rate-limiting": { "instances": [ { "name": "openai-gpt3", "limit": 50, "time_window": 60 }, { "name": "openai-gpt4", "limit": 20, "time_window": 60 } ] } }, "upstream": { "type": "roundrobin", "nodes": { "canbeanything.com": 1 } } }]] ) if code >= 300 then ngx.status = code end ngx.say(body) } } --- response_body passed === TEST 18: gpt3 allows 5 requests, gpt4 allows 2 requests --- pipelined_requests eval [ "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", ] --- more_headers Authorization: Bearer token --- error_code eval [200, 200, 200, 200, 200, 200, 200, 503, 503] === TEST 19: set limit & instances --- config location /t { content_by_lua_block { local t = require("lib.test_admin").test local code, body = t('/apisix/admin/routes/1', ngx.HTTP_PUT, [[{ "uri": "/ai", "plugins": { "ai-proxy-multi": { "fallback_strategy": "instance_health_and_rate_limiting", "instances": [ { "name": "openai-gpt4", "provider": "openai", "weight": 1, "priority": 1, "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-4" }, "override": { "endpoint": "http://localhost:16724" } }, { "name": "openai-gpt3", "provider": "openai", "weight": 1, "priority": 0, "auth": { "header": { "Authorization": "Bearer token" } }, "options": { "model": "gpt-3" }, "override": { "endpoint": "http://localhost:16724" } } ], "ssl_verify": false }, "ai-rate-limiting": { "limit": 20, "time_window": 60, "instances": [ { "name": "openai-gpt3", "limit": 50, "time_window": 60 } ] } }, "upstream": { "type": "roundrobin", "nodes": { "canbeanything.com": 1 } } }]] ) if code >= 300 then ngx.status = code end ngx.say(body) } } --- response_body passed === TEST 20: gpt3 allows 5 requests, gpt4 allows 2 requests --- pipelined_requests eval [ "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", "POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }", ] --- more_headers Authorization: Bearer token --- error_code eval [200, 200, 200, 200, 200, 200, 200, 503, 503]

t/plugin/ai-rate-limiting.t (114 lines of code) (raw):