in maga_transformer/openai/renderers/custom_renderer.py [0:0]
def collect_complete_response(self, choice_generator):
all_choices = []
usage = None
aux_info = None
def split_think_tag(text: Optional[str]):
if text is None:
return None, None
text_results = text.split(think_end_tag, 1)
reasoning_content = text_results[0] if len(text_results) == 2 else None
content = text_results[1] if len(text_results) == 2 else text
return content, reasoning_content
for response in choice_generator:
if len(response.choices) != len(all_choices):
if (all_choices == []):
for i, choice in enumerate(response.choices):
content, reasoning_content = split_think_tag(choice.delta.content)
all_choices.append(ChatCompletionResponseChoice(
index=i,
message=ChatMessage(
role=choice.delta.role or RoleEnum.assistant,
content=content or None,
reasoning_content=reasoning_content or None,
function_call=choice.delta.function_call or None,
),
finish_reason=choice.finish_reason,
logprobs=choice.logprobs,
)
)
else:
raise ValueError(f"response.choices has different length! "
f"[{response.choices}] vs [{all_choices}].")
else:
for i in range(len(all_choices)):
if all_choices[i].message.content == None:
all_choices[i].message.content = (response.choices[i].delta.content or None)
else:
all_choices[i].message.content += (response.choices[i].delta.content or "")
content, reasoning_content = split_think_tag(all_choices[i].message.content)
all_choices[i].message.content = content
all_choices[i].message.reasoning_content = reasoning_content
all_choices[i].message.role = response.choices[i].delta.role or all_choices[i].message.role
all_choices[i].message.function_call = response.choices[i].delta.function_call or all_choices[i].message.function_call
all_choices[i].finish_reason = response.choices[i].finish_reason or all_choices[i].finish_reason
if all_choices[i].logprobs != None:
if response.choices[i].logprobs != None:
all_choices[i].logprobs.content += response.choices[i].logprobs.content
else:
all_choices[i].logprobs = response.choices[i].logprobs
usage = response.usage or usage
aux_info = response.aux_info or aux_info
if (usage == None):
logging.warning(f"No usage returned from stream response. use empty value.")
usage = UsageInfo(
prompt_tokens=0,
total_tokens=0,
completion_tokens=0
)
chat_response = ChatCompletionResponse(
choices=all_choices,
usage=usage,
aux_info=aux_info,
model="AsyncModel",
)
return chat_response.model_dump_json(exclude_none=True)