def get_completion()

in connectors/aoai.py [0:0]


    def get_completion(self, prompt, max_tokens=800, retry_after=True):
        one_liner_prompt = prompt.replace('\n', ' ')
        logging.info(f"[aoai] Getting completion for prompt: {one_liner_prompt[:100]}")
        openai_deployment = os.getenv('AZURE_OPENAI_CHATGPT_DEPLOYMENT')

        # truncate prompt if needed
        prompt = self._truncate_input(prompt, MAX_GPT_MODEL_INPUT_TOKENS)

        try:
            input_messages = [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"{prompt}"}
            ]

            response = self.client.chat.completions.create(
                messages=input_messages,
                model=openai_deployment,
                temperature=float(os.environ.get('AZURE_OPENAI_TEMPERATURE', 0.7)),
                top_p=float(os.environ.get('AZURE_OPENAI_TOP_P', 0.95)),
                max_tokens=max_tokens
            )

            completion = response.choices[0].message.content

            return completion

        except RateLimitError as e:
            retry_after_ms = e.response.headers.get('retry-after-ms')
            if retry_after_ms:
                retry_after_ms = int(retry_after_ms)
                logging.info(f"[aoai] get_completion: Reached rate limit, retrying after {retry_after_ms} ms")
                time.sleep(retry_after_ms / 1000)
                return self.get_completion(self, prompt, retry_after=False)
            else:
                logging.error(f"[aoai] get_completion: Rate limit error occurred, no 'retry-after-ms' provided: {e}")
                raise

        except Exception as e:
            logging.error(f"[aoai] get_completion: An unexpected error occurred: {e}")
            raise