scenarios/workload-genai/policies/fragments/manage-spikes-with-payg/retry-with-payg.xml (23 lines of code) (raw):

<!-- Policy fragment to use Pay As You Go (PAYG) endpoints when the PTU endpoints are busy. should be used inside the <backend> section of the policy Expected named values - ptu-endpoint-1, payg-endpoint-1, payg-endpoint-2: The backend endpoints to be load balanced. --> <fragment> <!-- Exponential retry policy: If the HTTP status code of the response is 300 or greater, the request is retried. Maximum retries: 3. Initial retry interval: 1 second. Maximum retry interval: 10 seconds. Retry interval increase per retry: 2 seconds --> <retry condition="@(context.Response.StatusCode == 429)" count="3" interval="1" max-interval="10" delta="2"> <!-- The retry policy executes its child policies once and then retries their execution until the retry condition becomes false or retry count is exhausted. --> <set-variable name="body" value="@(context.Request.Body.As<string>(preserveContent: true))" /> <choose> <when condition="@(context.Response.StatusCode == 429)"> <!-- If the PTU returns 429, the control enters this block --> <!-- It is also possible to have a pool of backends and load balance them --> <set-variable name="selected-backend-id" value="payg-backend-1" /> <set-header name="Authorization" exists-action="override"> <value>@("Bearer " + (string)context.Variables["msi-access-token"])</value> </set-header> <set-backend-service backend-id="@((string)context.Variables["selected-backend-id"])" /> </when> <otherwise> <!-- This is the default path of the request --> <!-- The backend will point to the `ptu endpoint` or it is possible to load balance multiple PTU endpoints as well --> <set-variable name="selected-backend-id" value="ptu-backend-1" /> <set-header name="Authorization" exists-action="override"> <value>@("Bearer " + (string)context.Variables["msi-access-token"])</value> </set-header> <set-backend-service backend-id="@((string)context.Variables["selected-backend-id"])" /> </otherwise> </choose> <set-body>@((string)context.Variables["body"])</set-body> <forward-request timeout="120" fail-on-error-status-code="true" buffer-response="false" /> </retry> </fragment>