infrastructure/arm/xml/lbaoai.xml (138 lines of code) (raw):

<policies> <inbound> <base /> <cache-lookup-value key="listBackends" variable-name="listBackends" /> <choose> <when condition="@(context.Variables.ContainsKey("listBackends") == false)"> <set-variable name="listBackends" value="@{ JArray backends = new JArray(); backends.Add(new JObject() { { "url", "{{backend1}}" }, { "priority", 1}, { "isThrottling", false }, { "retryAfter", DateTime.MinValue } }); backends.Add(new JObject() { { "url", "{{backend2}}" }, { "priority", 1}, { "isThrottling", false }, { "retryAfter", DateTime.MinValue } }); return backends; }" /> <cache-store-value key="listBackends" value="@((JArray)context.Variables["listBackends"])" duration="60" /> </when> </choose> <authentication-managed-identity resource="https://cognitiveservices.azure.com" output-token-variable-name="msi-access-token" ignore-error="false" /> <set-header name="Authorization" exists-action="override"> <value>@("Bearer " + (string)context.Variables["msi-access-token"])</value> </set-header> <set-variable name="backendIndex" value="-1" /> <set-variable name="remainingBackends" value="1" /> </inbound> <backend> <retry condition="@(context.Response != null && (context.Response.StatusCode == 429 || context.Response.StatusCode >= 500) && ((Int32)context.Variables["remainingBackends"]) > 0)" count="50" interval="0"> <set-variable name="listBackends" value="@{ JArray backends = (JArray)context.Variables["listBackends"]; for (int i = 0; i < backends.Count; i++) { JObject backend = (JObject)backends[i]; if (backend.Value<bool>("isThrottling") && DateTime.Now >= backend.Value<DateTime>("retryAfter")) { backend["isThrottling"] = false; backend["retryAfter"] = DateTime.MinValue; } } return backends; }" /> <cache-store-value key="listBackends" value="@((JArray)context.Variables["listBackends"])" duration="60" /> <set-variable name="backendIndex" value="@{ JArray backends = (JArray)context.Variables["listBackends"]; int selectedPriority = Int32.MaxValue; List<int> availableBackends = new List<int>(); for (int i = 0; i < backends.Count; i++) { JObject backend = (JObject)backends[i]; if (!backend.Value<bool>("isThrottling")) { int backendPriority = backend.Value<int>("priority"); if (backendPriority < selectedPriority) { selectedPriority = backendPriority; availableBackends.Clear(); availableBackends.Add(i); } else if (backendPriority == selectedPriority) { availableBackends.Add(i); } } } if (availableBackends.Count == 1) { return availableBackends[0]; } if (availableBackends.Count > 0) { //Returns a random backend from the list if we have more than one available with the same priority return availableBackends[new Random().Next(0, availableBackends.Count)]; } else { //If there are no available backends, the request will be sent to the first one return 0; } }" /> <set-variable name="backendUrl" value="@(((JObject)((JArray)context.Variables["listBackends"])[(Int32)context.Variables["backendIndex"]]).Value<string>("url") + "/openai")" /> <set-backend-service base-url="@((string)context.Variables["backendUrl"])" /> <forward-request buffer-request-body="true" /> <choose> <when condition="@(context.Response != null && (context.Response.StatusCode == 429 || context.Response.StatusCode >= 500) )"> <cache-lookup-value key="listBackends" variable-name="listBackends" /> <set-variable name="listBackends" value="@{ JArray backends = (JArray)context.Variables["listBackends"]; int currentBackendIndex = context.Variables.GetValueOrDefault<int>("backendIndex"); int retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("Retry-After", "-1")); if (retryAfter == -1) { retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("x-ratelimit-reset-requests", "-1")); } if (retryAfter == -1) { retryAfter = Convert.ToInt32(context.Response.Headers.GetValueOrDefault("x-ratelimit-reset-tokens", "10")); } JObject backend = (JObject)backends[currentBackendIndex]; backend["isThrottling"] = true; backend["retryAfter"] = DateTime.Now.AddSeconds(retryAfter); return backends; }" /> <cache-store-value key="listBackends" value="@((JArray)context.Variables["listBackends"])" duration="60" /> <set-variable name="remainingBackends" value="@{ JArray backends = (JArray)context.Variables["listBackends"]; int remainingBackends = 0; for (int i = 0; i < backends.Count; i++) { JObject backend = (JObject)backends[i]; if (!backend.Value<bool>("isThrottling")) { remainingBackends++; } } return remainingBackends; }" /> </when> </choose> </retry> </backend> <outbound> <base /> <set-header name="x-openai-backendurl" exists-action="override"> <value>@(context.Variables.GetValueOrDefault<string>("backendUrl", "none"))</value> </set-header> </outbound> <on-error> <base /> </on-error> </policies>