model-gallery/deploy/mllm/openai

import base64 import requests from openai import OpenAI ##### API 配置 ##### openai_api_key = "<EAS API KEY>" openai_api_base = "<EAS API Endpoint>/v1" client = OpenAI( api_key=openai_api_key, base_url=openai_api_base, ) models = client.models.list() model = models.data[0].id def encode_base64_content_from_url(content_url: str) -> str: """Encode a content retrieved from a remote url to base64 format.""" with requests.get(content_url) as response: response.raise_for_status() result = base64.b64encode(response.content).decode("utf-8") return result def infer_image(): image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/QVQ/demo.png" stream = True image_base64 = encode_base64_content_from_url(image_url) chat_completion_from_base64 = client.chat.completions.create( messages=[ { "role": "user", "content": [ { "type": "text", "text": "使用中文回答，图中方框处应该是数字多少?", }, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}, }, ], } ], model=model, max_completion_tokens=1024, stream=stream, ) if stream: for chunk in chat_completion_from_base64: print(chunk.choices[0].delta.content, end="") else: result = chat_completion_from_base64.choices[0].message.content print(result) def infer_video(): video_url = "https://pai-quickstart-predeploy-hangzhou.oss-cn-hangzhou.aliyuncs.com/modelscope/algorithms/ms-swift/video_demo.mp4" stream = True video_base64 = encode_base64_content_from_url(video_url) chat_completion_from_base64 = client.chat.completions.create( messages=[ { "role": "user", "content": [ {"type": "text", "text": "请描述下视频内容"}, { "type": "video_url", "video_url": {"url": f"data:video/mp4;base64,{video_base64}"}, }, ], } ], model=model, max_completion_tokens=512, stream=stream, ) if stream: for chunk in chat_completion_from_base64: print(chunk.choices[0].delta.content, end="") else: result = chat_completion_from_base64.choices[0].message.content print(result) if __name__ == "__main__": infer_image() infer_video()

model-gallery/deploy/mllm/openai_client.py (76 lines of code) (raw):