def get_completion()

in tools/aoai.py [0:0]


    def get_completion(self, prompt, image_base64=None, max_tokens=800, retry_after=True):
        """
        Generates a completion for the given prompt using the Azure OpenAI service.

        Args:
            prompt (str): The input prompt for the model.
            image_base64 (str, optional): Base64 encoded image to be included with the prompt. Defaults to None.
            max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 800.
            retry_after (bool, optional): Flag to determine if the method should retry after rate limiting. Defaults to True.

        Returns:
            str: The generated completion.
        """
        one_liner_prompt = prompt.replace('\n', ' ')
        logging.debug(f"[aoai]{self.document_filename} Getting completion for prompt: {one_liner_prompt[:100]}")

        # Truncate prompt if needed
        prompt = self._truncate_input(prompt, self.max_gpt_model_input_tokens)

        try:

            input_messages = [
                {"role": "system", "content": "You are a helpful assistant."},
            ]

            if not image_base64:
                input_messages.append({"role": "user", "content": prompt})
            else:
                input_messages.append({"role": "user", "content": [
                        {
                            "type": "text",
                            "text": prompt
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url":f"data:image/jpeg;base64,{image_base64}"
                            }
                        } 
                ]})

            response = self.client.chat.completions.create(
                messages=input_messages,
                model=self.openai_gpt_deployment,
                temperature=0.7,
                top_p=0.95,
                max_tokens=max_tokens
            )

            completion = response.choices[0].message.content
            logging.debug(f"[aoai]{self.document_filename} Completion received successfully.")
            return completion

        except RateLimitError as e:
            if not retry_after:
                logging.error(f"[aoai]{self.document_filename} get_completion: Rate limit error occurred after retries: {e}")
                raise

            retry_after_ms = e.response.headers.get('retry-after-ms')
            if retry_after_ms:
                retry_after_ms = int(retry_after_ms)
                logging.info(f"[aoai]{self.document_filename} get_completion: Reached rate limit, retrying after {retry_after_ms} ms")
                time.sleep(retry_after_ms / 1000)
                return self.get_completion(prompt, max_tokens=max_tokens, retry_after=False)
            else:
                logging.error(f"[aoai]{self.document_filename} get_completion: Rate limit error occurred, no 'retry-after-ms' provided: {e}")
                raise

        except ClientAuthenticationError as e:
            logging.error(f"[aoai]{self.document_filename} get_completion: Authentication failed: {e}")
            raise

        except Exception as e:
            logging.error(f"[aoai]{self.document_filename} get_completion: An unexpected error occurred: {e}")
            raise