def analyze_results()

in ember/benchmark.py [0:0]


    def analyze_results(self, results: List[Dict]) -> Dict:
        """Analyze benchmark results."""
        successful_requests = [r for r in results if r["success"]]
        failed_requests = [r for r in results if not r["success"]]
        
        if not successful_requests:
            return {"error": "No successful requests to analyze"}
            
        durations = [r["duration"] for r in successful_requests]
        total_duration = max(r["duration"] for r in results)  # Time from start to last completion
        
        return {
            "total_requests": len(results),
            "successful_requests": len(successful_requests),
            "failed_requests": len(failed_requests),
            "documents_per_request": self.config.max_batch_size,
            "total_documents_processed": len(successful_requests) * self.config.max_batch_size,
            "average_duration": statistics.mean(durations),
            "median_duration": statistics.median(durations),
            "p95_duration": sorted(durations)[int(len(durations) * 0.95)],
            "min_duration": min(durations),
            "max_duration": max(durations),
            "std_dev": statistics.stdev(durations) if len(durations) > 1 else 0,
            "total_duration": total_duration,
            "requests_per_second": len(successful_requests) / total_duration,
            "documents_per_second": (len(successful_requests) * self.config.max_batch_size) / total_duration
        }