def _launch

def _launch_ui()

in model-gallery/deploy/llm/BladeLLM/webui_client.py [0:0]
196 lines of code
15 McCabe index (conditional complexity)

def _launch_ui(model_name, client, args):
    def _post_process(text):
        return text.replace("<think>", "&lt;think&gt;").replace(
            "</think>", "&lt;/think&gt;"
        )

    def _transform_messages(history, max_rounds, apply_max_rounds, system_prompt):
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})

        if not apply_max_rounds:
            begin_index = 0
        else:
            begin_index = max(0, len(history) - max_rounds)

        for i in range(begin_index, len(history)):
            query, response = history[i]
            messages.append({"role": "user", "content": query})
            messages.append({"role": "assistant", "content": response})
        messages.pop()  # pop the None assistant response
        return messages

    def predict(
        _chatbot,
        max_tokens,
        top_k,
        apply_top_k,
        top_p,
        apply_top_p,
        temperature,
        apply_temperature,
        use_stream,
        max_rounds,
        apply_max_rounds,
        system_prompt,
    ):
        chat_query = _chatbot[-1][0]
        if len(chat_query) == 0:
            _chatbot.pop()
            return _chatbot
        messages = _transform_messages(
            _chatbot, max_rounds, apply_max_rounds, system_prompt
        )
        print(f"Messages: {json.dumps(messages,ensure_ascii=False,indent=2)}")
        request = {"messages": messages, "stream": use_stream, "max_tokens": max_tokens}
        if apply_temperature:
            request["temperature"] = temperature
        if apply_top_k:
            request["top_k"] = top_k
        if apply_top_p:
            request["top_p"] = top_p
        response = requests.post(
            url=f"{args.eas_endpoint}/v1/chat/completions",
            headers={
                "Content-Type": "application/json",
                "Authorization": args.eas_token,
            },
            json=request,
            stream=use_stream,
        )
        print("Response:", end="")
        if use_stream:
            generated_text = ""
            for chunk in response.iter_lines(chunk_size=8192, decode_unicode=False):
                msg = chunk.decode("utf-8")
                if msg.startswith("data"):
                    info = msg[6:]
                    if info == "[DONE]":
                        break
                    else:
                        resp = json.loads(info)
                        generated_text += _post_process(
                            resp["choices"][0]["delta"]["content"]
                        )
                        print(resp["choices"][0]["delta"]["content"], end="")
                        _chatbot[-1] = (chat_query, generated_text)
                        yield _chatbot
        else:
            resp = json.loads(response.text)
            generated_text = _post_process(resp["choices"][0]["message"]["content"])
            print(generated_text, end="")
            _chatbot[-1] = (chat_query, generated_text)
            yield _chatbot
        print()

    def add_text(history, text):
        history = history if history is not None else []
        history.append([text, None])  # [user_query, bot_response]
        return history, None

    def clear_history(history):
        if history:
            history.clear()
        return []

    with gr.Blocks(analytics_enabled=False, css=css) as demo:
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("""<h2><center>ChatLLM-WebUI</center></h2>""")
                if model_name:
                    gr.Markdown(f"""<h3><center>{model_name}</center></h3>""")
                with gr.Row():
                    with gr.Column(variant="panel"):
                        model_argument = gr.Accordion("Model Arguments")
                        with model_argument:
                            with gr.Row():
                                max_tokens = gr.Slider(
                                    minimum=10,
                                    maximum=10240,
                                    step=10,
                                    label="max_tokens",
                                    value=512,
                                )
                            with gr.Row():
                                apply_top_k = gr.Checkbox(
                                    label="", value=True, elem_classes="checkbox"
                                )
                                top_k = gr.Slider(
                                    minimum=0,
                                    maximum=100,
                                    step=1,
                                    label="top_k",
                                    value=10,
                                )
                            with gr.Row():
                                apply_top_p = gr.Checkbox(
                                    label="", value=False, elem_classes="checkbox"
                                )
                                top_p = gr.Slider(
                                    minimum=0.0,
                                    maximum=1.0,
                                    step=0.01,
                                    label="top_p",
                                    value=0,
                                )
                            with gr.Row():
                                apply_temperature = gr.Checkbox(
                                    label="", value=True, elem_classes="checkbox"
                                )
                                temperature = gr.Slider(
                                    minimum=0.0,
                                    maximum=2.0,
                                    step=0.01,
                                    label="temperature",
                                    value=0.7,
                                )

                            with gr.Row():
                                use_stream_chat = gr.Checkbox(
                                    label="use_stream_chat", value=True
                                )

                        with gr.Row():
                            max_rounds = gr.Slider(
                                minimum=1,
                                maximum=100,
                                step=1,
                                label="max_rounds",
                                value=10,
                            )
                            apply_max_rounds = gr.Checkbox(
                                label="", value=True, elem_classes="checkbox"
                            )

                        with gr.Row():
                            system_prompt = gr.Textbox(
                                label="System Prompt",
                                lines=4,
                                value="You are a helpful assistant.",
                            )
                            clear_prompt_btn = gr.Button("Clear Prompt")

            with gr.Column(scale=4):
                chatbot = gr.Chatbot(elem_id="chat-box", show_label=False, height=560)
                with gr.Row():
                    query = gr.Textbox(label="Input", lines=3)

                with gr.Row():
                    submit_btn = gr.Button("submit", elem_id="c_generate")
                    clear_history_btn = gr.Button("clear history")

        submit_btn.click(add_text, [chatbot, query], [chatbot, query]).then(
            predict,
            [
                chatbot,
                max_tokens,
                top_k,
                apply_top_k,
                top_p,
                apply_top_p,
                temperature,
                apply_temperature,
                use_stream_chat,
                max_rounds,
                apply_max_rounds,
                system_prompt,
            ],
            [chatbot],
            show_progress=True,
        )
        clear_history_btn.click(clear_history, [chatbot], [chatbot], show_progress=True)
        clear_prompt_btn.click(lambda: "", None, [system_prompt])

    demo.queue().launch(
        share=args.share,
        inbrowser=args.inbrowser,
        server_port=args.server_port,
        server_name=args.server_name,
    )