def call_model_api()

in src/models/struxgpt_base.py [0:0]


    def call_model_api(self, prompt: Union[str, list], history=None,
                             batched=False, bs=8, **kwargs) -> Union[str, List[str]]:
        if batched:
            assert isinstance(prompt, list)
            return multiprocess_call(self.call_model_api, 
                                     [(s, ) for s in prompt], 
                                     [kwargs] * len(prompt),
                                     num_threads=bs)
        else:
            assert isinstance(prompt, str)
            model_url = kwargs.get('url', self.model_name_or_path)
            headers = { 'Content-Type': 'application/json' }
            input_param = {
                "input": prompt,
                "history": history,
                # "serviceParams": {
                #     "maxContentRound": 5,
                #     "stream": False,
                #     "generateStyle": "chat"
                # },
                "modelParams": {
                    "best_of": 1,
                    # "temperature": 0.0,
                    "length_penalty": 1.0
                },
                "serviceParams": {
                    # "maxWindowSize": 6144,
                    # "maxOutputLength": 2048,
                    "promptTemplateName": "default",
                    # "system": "You are a helpful assistant."
                },
            }
            input_param['modelParams']['temperature'] = kwargs.get('temperature', 0.0)
            input_param['serviceParams']['maxWindowSize'] = kwargs.get('maxWindowSize', 8192)
            input_param['serviceParams']['maxOutputLength'] = kwargs.get('maxOutputLength', 128)
            input_param['serviceParams']['system'] = kwargs.get('system', "You are a helpful assistant.")
            # print(input_param)
            data = json.dumps(input_param)

            res = requests.request("POST", model_url, headers=headers, data=data)
            if res.status_code != HTTPStatus.OK:
                raise RuntimeError(f"Connect to server error.\nStatus Code: {res.status_code}\nMessage: {res.reason}")
            else:
                response = json.loads(res.text)
                return response['data']['output']