func printMarkdownResults()

in k8s-bench/main.go [318:576]


func printMarkdownResults(config AnalyzeConfig, results []model.TaskResult, resultsFilePath string) error {
	// Create a buffer to hold the output
	var buffer strings.Builder

	buffer.WriteString("# K8s-bench Evaluation Results\n\n")

	allModels := make(map[string]bool) // Track all unique models
	for _, result := range results {
		allModels[result.LLMConfig.ModelID] = true
	}
	// Convert allModels map to a sorted slice
	models := make([]string, 0, len(allModels))
	for model := range allModels {
		models = append(models, model)
	}
	sort.Strings(models)

	// Overall summary across all results
	totalCount := len(results)
	overallSuccessCount := 0
	overallFailCount := 0
	for _, result := range results {
		if strings.Contains(strings.ToLower(result.Result), "success") {
			overallSuccessCount++
		} else {
			overallFailCount++
		}
	}

	// --- Model Performance Summary ---
	buffer.WriteString("## Model Performance Summary\n\n")

	if config.IgnoreToolUseShim {
		// Simplified table ignoring shim status
		buffer.WriteString("| Model | Success | Fail |\n")
		buffer.WriteString("|-------|---------|------|\n")

		for _, model := range models {
			successCount := 0
			failCount := 0
			for _, result := range results {
				if result.LLMConfig.ModelID == model {
					if strings.Contains(strings.ToLower(result.Result), "success") {
						successCount++
					} else {
						failCount++
					}
				}
			}
			buffer.WriteString(fmt.Sprintf("| %s | %d | %d |\n", model, successCount, failCount))
		}
		// Overall totals row
		buffer.WriteString("| **Total** |")
		buffer.WriteString(fmt.Sprintf(" %d | %d |\n\n", overallSuccessCount, overallFailCount))

	} else {
		// Original table grouped by tool use shim status
		resultsByToolUseShim := make(map[string][]model.TaskResult)
		for _, result := range results {
			var toolUseShimStr string
			if result.LLMConfig.EnableToolUseShim {
				toolUseShimStr = "shim_enabled"
			} else {
				toolUseShimStr = "shim_disabled"
			}
			resultsByToolUseShim[toolUseShimStr] = append(resultsByToolUseShim[toolUseShimStr], result)
		}

		toolUseShimStrs := make([]string, 0, len(resultsByToolUseShim))
		for toolUseShimStr := range resultsByToolUseShim {
			toolUseShimStrs = append(toolUseShimStrs, toolUseShimStr)
		}
		sort.Strings(toolUseShimStrs)

		// Create header row with success/fail columns for each toolUseShimStr
		buffer.WriteString("| Model |")
		for _, toolUseShimStr := range toolUseShimStrs {
			buffer.WriteString(fmt.Sprintf(" %s Success | %s Fail |", toolUseShimStr, toolUseShimStr))
		}
		buffer.WriteString("\n|-------|")
		for range toolUseShimStrs {
			buffer.WriteString("------------|-----------|")
		}
		buffer.WriteString("\n")

		// Add a row for each model with success/fail counts for each strategy
		for _, model := range models {
			buffer.WriteString(fmt.Sprintf("| %s |", model))
			for _, toolUseShimStr := range toolUseShimStrs {
				successCount := 0
				failCount := 0
				// Count success/fail for this model and toolUseShimStr
				for _, result := range resultsByToolUseShim[toolUseShimStr] {
					if result.LLMConfig.ModelID == model {
						if strings.Contains(strings.ToLower(result.Result), "success") {
							successCount++
						} else {
							failCount++
						}
					}
				}
				buffer.WriteString(fmt.Sprintf(" %d | %d |", successCount, failCount))
			}
			buffer.WriteString("\n")
		}

		// Add a row showing overall totals for each toolUseShimStr
		buffer.WriteString("| **Total** |")
		for _, toolUseShimStr := range toolUseShimStrs {
			successCount := 0
			failCount := 0
			for _, result := range resultsByToolUseShim[toolUseShimStr] {
				if strings.Contains(strings.ToLower(result.Result), "success") {
					successCount++
				} else {
					failCount++
				}
			}
			buffer.WriteString(fmt.Sprintf(" %d | %d |", successCount, failCount))
		}
		buffer.WriteString("\n\n")
	}

	// --- Overall Summary ---
	buffer.WriteString("## Overall Summary\n\n")
	buffer.WriteString(fmt.Sprintf("- Total Runs: %d\n", totalCount))
	buffer.WriteString(fmt.Sprintf("- Overall Success: %d (%d%%)\n", overallSuccessCount, calculatePercentage(overallSuccessCount, totalCount)))
	buffer.WriteString(fmt.Sprintf("- Overall Fail: %d (%d%%)\n\n", overallFailCount, calculatePercentage(overallFailCount, totalCount)))

	// --- Detailed Results ---
	if config.IgnoreToolUseShim {
		// Group results by model for detailed view
		resultsByModel := make(map[string][]model.TaskResult)
		for _, result := range results {
			resultsByModel[result.LLMConfig.ModelID] = append(resultsByModel[result.LLMConfig.ModelID], result)
		}

		for _, model := range models {
			buffer.WriteString(fmt.Sprintf("## Model: %s\n\n", model))
			buffer.WriteString("| Task | Provider | Result |\n")
			buffer.WriteString("|------|----------|--------|\n")

			modelSuccessCount := 0
			modelFailCount := 0
			modelResults := resultsByModel[model]
			modelTotalCount := len(modelResults)

			// Sort results within the model group for consistent output (e.g., by Task)
			sort.Slice(modelResults, func(i, j int) bool {
				return modelResults[i].Task < modelResults[j].Task
			})

			for _, result := range modelResults {
				resultEmoji := "❌" // Default to failure
				if strings.Contains(strings.ToLower(result.Result), "success") {
					resultEmoji = "✅"
					modelSuccessCount++
				} else {
					modelFailCount++
				}

				buffer.WriteString(fmt.Sprintf("| %s | %s | %s %s |\n",
					result.Task,
					result.LLMConfig.ProviderID,
					resultEmoji, result.Result))
			}

			// Add summary for this model
			buffer.WriteString(fmt.Sprintf("\n**%s Summary**\n\n", model))
			buffer.WriteString(fmt.Sprintf("- Total: %d\n", modelTotalCount))
			buffer.WriteString(fmt.Sprintf("- Success: %d (%d%%)\n", modelSuccessCount, calculatePercentage(modelSuccessCount, modelTotalCount)))
			buffer.WriteString(fmt.Sprintf("- Fail: %d (%d%%)\n\n", modelFailCount, calculatePercentage(modelFailCount, modelTotalCount)))
		}

	} else {
		// Original detailed results grouped by tool use shim status
		resultsByToolUseShim := make(map[string][]model.TaskResult)
		for _, result := range results {
			var toolUseShimStr string
			if result.LLMConfig.EnableToolUseShim {
				toolUseShimStr = "shim_enabled"
			} else {
				toolUseShimStr = "shim_disabled"
			}
			resultsByToolUseShim[toolUseShimStr] = append(resultsByToolUseShim[toolUseShimStr], result)
		}
		toolUseShimStrs := make([]string, 0, len(resultsByToolUseShim))
		for toolUseShimStr := range resultsByToolUseShim {
			toolUseShimStrs = append(toolUseShimStrs, toolUseShimStr)
		}
		sort.Strings(toolUseShimStrs)

		for _, toolUseShimStr := range toolUseShimStrs {
			toolUseShimStrResults := resultsByToolUseShim[toolUseShimStr]
			// Print a header for this toolUseShimStr
			buffer.WriteString(fmt.Sprintf("## Tool Use: %s\n\n", toolUseShimStr))

			// Create the table header
			buffer.WriteString("| Task | Provider | Model | Result |\n")
			buffer.WriteString("|------|----------|-------|--------|\n")

			// Track success and failure counts for this strategy
			successCount := 0
			failCount := 0
			totalCount := len(toolUseShimStrResults)

			// Sort results within the group for consistent output (e.g., by Task)
			sort.Slice(toolUseShimStrResults, func(i, j int) bool {
				if toolUseShimStrResults[i].LLMConfig.ModelID != toolUseShimStrResults[j].LLMConfig.ModelID {
					return toolUseShimStrResults[i].LLMConfig.ModelID < toolUseShimStrResults[j].LLMConfig.ModelID
				}
				return toolUseShimStrResults[i].Task < toolUseShimStrResults[j].Task
			})

			// Add each result as a row in the table
			for _, result := range toolUseShimStrResults {
				resultEmoji := "❌" // Default to failure
				if strings.Contains(strings.ToLower(result.Result), "success") {
					resultEmoji = "✅"
					successCount++
				} else {
					failCount++
				}

				buffer.WriteString(fmt.Sprintf("| %s | %s | %s | %s %s |\n",
					result.Task,
					result.LLMConfig.ProviderID,
					result.LLMConfig.ModelID,
					resultEmoji, result.Result))
			}

			// Add summary for this toolUseShimStr
			buffer.WriteString(fmt.Sprintf("\n**%s Summary**\n\n", toolUseShimStr))
			buffer.WriteString(fmt.Sprintf("- Total: %d\n", totalCount))
			buffer.WriteString(fmt.Sprintf("- Success: %d (%d%%)\n", successCount, calculatePercentage(successCount, totalCount)))
			buffer.WriteString(fmt.Sprintf("- Fail: %d (%d%%)\n\n", failCount, calculatePercentage(failCount, totalCount)))
		}
	}

	// --- Footer ---
	buffer.WriteString("---\n\n")
	buffer.WriteString(fmt.Sprintf("_Report generated on %s_\n", time.Now().Format("January 2, 2006 at 3:04 PM")))

	// Get the final output
	output := buffer.String()

	// Write to file if path is provided, otherwise print to stdout
	if resultsFilePath != "" {
		if err := os.WriteFile(resultsFilePath, []byte(output), 0644); err != nil {
			return fmt.Errorf("writing to file %q: %w", resultsFilePath, err)
		}
		fmt.Printf("Results written to %s\n", resultsFilePath)
	} else {
		// Print to stdout only if no file path is specified
		fmt.Print(output)
	}

	return nil
}