def _launch_ui()

in model-gallery/deploy/llm/vLLM/webui_client.py [0:0]


def _launch_ui(model_name, client, args):
    def _post_process(text):
        return text.replace("<think>", "&lt;think&gt;").replace(
            "</think>", "&lt;/think&gt;"
        )

    def _transform_messages(history, max_rounds, apply_max_rounds, system_prompt):
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})

        if not apply_max_rounds:
            begin_index = 0
        else:
            begin_index = max(0, len(history) - max_rounds)

        for i in range(begin_index, len(history)):
            query, response = history[i]
            messages.append({"role": "user", "content": query})
            messages.append({"role": "assistant", "content": response})
        messages.pop()  # pop the None assistant response
        return messages

    def predict(
        _chatbot,
        max_completion_tokens,
        top_p,
        apply_top_p,
        temperature,
        apply_temperature,
        use_stream,
        max_rounds,
        apply_max_rounds,
        system_prompt,
    ):
        chat_query = _chatbot[-1][0]
        if len(chat_query) == 0:
            _chatbot.pop()
            return _chatbot
        messages = _transform_messages(
            _chatbot, max_rounds, apply_max_rounds, system_prompt
        )
        print(f"Messages: {json.dumps(messages, ensure_ascii=False, indent=2)}")
        gen = client.chat.completions.create(
            messages=messages,
            model=model_name,
            max_completion_tokens=max_completion_tokens,
            top_p=top_p if apply_top_p else NOT_GIVEN,
            temperature=temperature if apply_temperature else NOT_GIVEN,
            stream=use_stream,
        )
        print("Response:", end="")
        if use_stream:
            generated_text = ""
            for chunk in gen:
                generated_text += _post_process(chunk.choices[0].delta.content)
                print(chunk.choices[0].delta.content, end="")
                _chatbot[-1] = (chat_query, generated_text)
                yield _chatbot
        else:
            generated_text = _post_process(gen.choices[0].message.content)
            print(gen.choices[0].message.content, end="")
            _chatbot[-1] = (chat_query, generated_text)
            yield _chatbot
        print()

    def add_text(history, text):
        history = history if history is not None else []
        history.append([text, None])  # [user_query, bot_response]
        return history, None

    def clear_history(history):
        if history:
            history.clear()
        return []

    with gr.Blocks(analytics_enabled=False, css=css) as demo:
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("""<h2><center>ChatLLM-WebUI</center></h2>""")
                gr.Markdown(f"""<h3><center>{model_name}</center></h3>""")
                with gr.Row():
                    with gr.Column(variant="panel"):
                        model_argument = gr.Accordion("Model Arguments")
                        with model_argument:
                            with gr.Row():
                                max_completion_tokens = gr.Slider(
                                    minimum=10,
                                    maximum=10240,
                                    step=10,
                                    label="max_completion_tokens",
                                    value=512,
                                )
                            with gr.Row():
                                apply_top_p = gr.Checkbox(
                                    label="", value=False, elem_classes="checkbox"
                                )
                                top_p = gr.Slider(
                                    minimum=0.0,
                                    maximum=1.0,
                                    step=0.01,
                                    label="top_p",
                                    value=0,
                                )
                            with gr.Row():
                                apply_temperature = gr.Checkbox(
                                    label="", value=True, elem_classes="checkbox"
                                )
                                temperature = gr.Slider(
                                    minimum=0.0,
                                    maximum=2.0,
                                    step=0.01,
                                    label="temperature",
                                    value=0.7,
                                )

                            with gr.Row():
                                use_stream_chat = gr.Checkbox(
                                    label="use_stream_chat", value=True
                                )

                        with gr.Row():
                            max_rounds = gr.Slider(
                                minimum=1,
                                maximum=100,
                                step=1,
                                label="max_rounds",
                                value=10,
                            )
                            apply_max_rounds = gr.Checkbox(
                                label="", value=True, elem_classes="checkbox"
                            )

                        with gr.Row():
                            system_prompt = gr.Textbox(
                                label="System Prompt",
                                lines=4,
                                value="You are a helpful assistant.",
                            )
                            clear_prompt_btn = gr.Button("Clear Prompt")

            with gr.Column(scale=4):
                chatbot = gr.Chatbot(elem_id="chat-box", show_label=False, height=560)
                with gr.Row():
                    query = gr.Textbox(label="Input", lines=3)

                with gr.Row():
                    submit_btn = gr.Button("submit", elem_id="c_generate")
                    clear_history_btn = gr.Button("clear history")

        submit_btn.click(add_text, [chatbot, query], [chatbot, query]).then(
            predict,
            [
                chatbot,
                max_completion_tokens,
                top_p,
                apply_top_p,
                temperature,
                apply_temperature,
                use_stream_chat,
                max_rounds,
                apply_max_rounds,
                system_prompt,
            ],
            [chatbot],
            show_progress=True,
        )
        clear_history_btn.click(clear_history, [chatbot], [chatbot], show_progress=True)
        clear_prompt_btn.click(lambda: "", None, [system_prompt])

    demo.queue().launch(
        share=args.share,
        inbrowser=args.inbrowser,
        server_port=args.server_port,
        server_name=args.server_name,
    )