def generate_stream_gate()

in fastchat/serve/huggingface_api_worker.py [0:0]


    def generate_stream_gate(self, params):
        self.call_ct += 1

        prompt = params["prompt"]
        gen_kwargs = get_gen_kwargs(params, seed=self.seed)
        stop = gen_kwargs["stop_sequences"]
        if "falcon" in self.model_path and "chat" in self.model_path:
            stop.extend(["\nUser:", "<|endoftext|>", " User:", "###"])
            stop = list(set(stop))
            gen_kwargs["stop_sequences"] = stop

        logger.info(f"prompt: {prompt}")
        logger.info(f"gen_kwargs: {gen_kwargs}")

        try:
            if self.model_path == "":
                url = f"{self.api_base}"
            else:
                url = f"{self.api_base}/{self.model_path}"
            client = InferenceClient(url, token=self.token)
            res = client.text_generation(
                prompt, stream=True, details=True, **gen_kwargs
            )

            reason = None
            text = ""
            for chunk in res:
                if chunk.token.special:
                    continue
                text += chunk.token.text

                s = next((x for x in stop if text.endswith(x)), None)
                if s is not None:
                    text = text[: -len(s)]
                    reason = "stop"
                    break
                if could_be_stop(text, stop):
                    continue
                if (
                    chunk.details is not None
                    and chunk.details.finish_reason is not None
                ):
                    reason = chunk.details.finish_reason
                if reason not in ["stop", "length"]:
                    reason = None
                ret = {
                    "text": text,
                    "error_code": 0,
                    "finish_reason": reason,
                }
                yield json.dumps(ret).encode() + b"\0"
        except Exception as e:
            ret = {
                "text": f"{SERVER_ERROR_MSG}\n\n({e})",
                "error_code": ErrorCode.INTERNAL_ERROR,
            }
            yield json.dumps(ret).encode() + b"\0"