func()

in plugins/wasm-go/extensions/ai-proxy/provider/qwen.go [312:419]


func (m *qwenProvider) buildChatCompletionStreamingResponse(ctx wrapper.HttpContext, qwenResponse *qwenTextGenResponse, incrementalStreaming bool) []*chatCompletionResponse {
	baseMessage := chatCompletionResponse{
		Id:                qwenResponse.RequestId,
		Created:           time.Now().UnixMilli() / 1000,
		Model:             ctx.GetStringContext(ctxKeyFinalRequestModel, ""),
		Choices:           make([]chatCompletionChoice, 0),
		SystemFingerprint: "",
		Object:            objectChatCompletionChunk,
	}

	responses := make([]*chatCompletionResponse, 0)

	qwenChoice := qwenResponse.Output.Choices[0]
	// Yes, Qwen uses a string "null" as null.
	finished := qwenChoice.FinishReason != "" && qwenChoice.FinishReason != "null"
	message := qwenChoice.Message

	reasoningContentMode := m.config.reasoningContentMode

	log.Warnf("incrementalStreaming: %v", incrementalStreaming)
	deltaContentMessage := &chatMessage{Role: message.Role, Content: message.Content, ReasoningContent: message.ReasoningContent}
	deltaToolCallsMessage := &chatMessage{Role: message.Role, ToolCalls: append([]toolCall{}, message.ToolCalls...)}
	if incrementalStreaming {
		deltaContentMessage.handleStreamingReasoningContent(ctx, reasoningContentMode)
	} else {
		for _, tc := range message.ToolCalls {
			if tc.Function.Arguments == "" && !finished {
				// We don't push any tool call until its arguments are available.
				return nil
			}
		}
		if pushedMessage, ok := ctx.GetContext(ctxKeyPushedMessage).(qwenMessage); ok {
			if message.Content == "" {
				message.Content = pushedMessage.Content
			} else if message.IsStringContent() {
				deltaContentMessage.Content = util.StripPrefix(deltaContentMessage.StringContent(), pushedMessage.StringContent())
			} else if strings.HasPrefix(baseMessage.Model, qwenVlModelPrefixName) {
				// Use the Qwen multimodal model generation API
				deltaContentList, ok := deltaContentMessage.Content.([]qwenVlMessageContent)
				if !ok {
					log.Warnf("unexpected deltaContentMessage content type: %T", deltaContentMessage.Content)
				} else {
					pushedContentList, ok := pushedMessage.Content.([]qwenVlMessageContent)
					if !ok {
						log.Warnf("unexpected pushedMessage content type: %T", pushedMessage.Content)
					} else {
						for i, content := range deltaContentList {
							if i >= len(pushedContentList) {
								break
							}
							pushedText := pushedContentList[i].Text
							content.Text = util.StripPrefix(content.Text, pushedText)
							deltaContentList[i] = content
						}
					}
				}
			}
			if message.ReasoningContent == "" {
				message.ReasoningContent = pushedMessage.ReasoningContent
			} else {
				deltaContentMessage.ReasoningContent = util.StripPrefix(deltaContentMessage.ReasoningContent, pushedMessage.ReasoningContent)
			}
			deltaContentMessage.handleStreamingReasoningContent(ctx, reasoningContentMode)

			if len(deltaToolCallsMessage.ToolCalls) > 0 && pushedMessage.ToolCalls != nil {
				for i, tc := range deltaToolCallsMessage.ToolCalls {
					if i >= len(pushedMessage.ToolCalls) {
						break
					}
					pushedFunction := pushedMessage.ToolCalls[i].Function
					tc.Function.Id = util.StripPrefix(tc.Function.Id, pushedFunction.Id)
					tc.Function.Name = util.StripPrefix(tc.Function.Name, pushedFunction.Name)
					tc.Function.Arguments = util.StripPrefix(tc.Function.Arguments, pushedFunction.Arguments)
					deltaToolCallsMessage.ToolCalls[i] = tc
				}
			}
		}
		ctx.SetContext(ctxKeyPushedMessage, message)
	}

	if !deltaContentMessage.IsEmpty() {
		response := *&baseMessage
		response.Choices = append(response.Choices, chatCompletionChoice{Delta: deltaContentMessage})
		responses = append(responses, &response)
	}
	if !deltaToolCallsMessage.IsEmpty() {
		response := *&baseMessage
		response.Choices = append(response.Choices, chatCompletionChoice{Delta: deltaToolCallsMessage})
		responses = append(responses, &response)
	}

	if finished {
		finishResponse := *&baseMessage
		finishResponse.Choices = append(finishResponse.Choices, chatCompletionChoice{Delta: &chatMessage{}, FinishReason: qwenChoice.FinishReason})

		usageResponse := *&baseMessage
		usageResponse.Choices = []chatCompletionChoice{{Delta: &chatMessage{}}}
		usageResponse.Usage = usage{
			PromptTokens:     qwenResponse.Usage.InputTokens,
			CompletionTokens: qwenResponse.Usage.OutputTokens,
			TotalTokens:      qwenResponse.Usage.TotalTokens,
		}

		responses = append(responses, &finishResponse, &usageResponse)
	}

	return responses
}