def collect_complete_response()

in maga_transformer/openai/renderers/custom_renderer.py [0:0]


    def collect_complete_response(self, choice_generator):
        all_choices = []
        usage = None
        aux_info = None
        
        def split_think_tag(text: Optional[str]):
            if text is None:
                return None, None
            text_results = text.split(think_end_tag, 1)
            reasoning_content = text_results[0] if len(text_results) == 2 else None
            content = text_results[1] if len(text_results) == 2 else text
            return content, reasoning_content
        
        for response in choice_generator:
            
            if len(response.choices) != len(all_choices):
                if (all_choices == []):
                    for i, choice in enumerate(response.choices):
                        content, reasoning_content = split_think_tag(choice.delta.content)
                        all_choices.append(ChatCompletionResponseChoice(
                                index=i,
                                message=ChatMessage(
                                    role=choice.delta.role or RoleEnum.assistant,
                                    content=content or None,
                                    reasoning_content=reasoning_content or None,
                                    function_call=choice.delta.function_call or None,
                                ),
                                finish_reason=choice.finish_reason,
                                logprobs=choice.logprobs,
                            )
                        )
                else:
                    raise ValueError(f"response.choices has different length! "
                                     f"[{response.choices}] vs [{all_choices}].")
            else:
                for i in range(len(all_choices)):
                    if all_choices[i].message.content == None:
                        all_choices[i].message.content = (response.choices[i].delta.content or None)
                    else:
                        all_choices[i].message.content += (response.choices[i].delta.content or "")
                    content, reasoning_content = split_think_tag(all_choices[i].message.content)
                    all_choices[i].message.content = content
                    all_choices[i].message.reasoning_content = reasoning_content
                    all_choices[i].message.role = response.choices[i].delta.role or all_choices[i].message.role
                    all_choices[i].message.function_call = response.choices[i].delta.function_call or all_choices[i].message.function_call
                    all_choices[i].finish_reason = response.choices[i].finish_reason or all_choices[i].finish_reason
                    if all_choices[i].logprobs != None:
                        if response.choices[i].logprobs != None:
                            all_choices[i].logprobs.content += response.choices[i].logprobs.content
                    else:
                        all_choices[i].logprobs = response.choices[i].logprobs
            usage = response.usage or usage
            aux_info = response.aux_info or aux_info

        if (usage == None):
            logging.warning(f"No usage returned from stream response. use empty value.")
            usage = UsageInfo(
                prompt_tokens=0,
                total_tokens=0,
                completion_tokens=0
            )
        chat_response = ChatCompletionResponse(
            choices=all_choices,
            usage=usage,
            aux_info=aux_info,
            model="AsyncModel",
        )
        return chat_response.model_dump_json(exclude_none=True)