def analyze_runs()

in parallel_eval/proctor.py [0:0]


    def analyze_runs(self):
        """We need to analze all the runs into a .json"""
        final_results = {
            "article_list": self.article_list,
            "num_trials": self.num_trials,
            "num_workers": self.num_workers,
            "max_steps": self.max_steps,
            "agent_settings": self.agent_settings,
            "runs": [],
        }

        win_count = 0
        lose_count = 0
        hops_distribution = []

        for run in self.runs:
            with open(run.output_file, "r") as f:
                result = json.load(f)
                final_results["runs"].append(result)
                if result["result"] == "win":
                    win_count += 1
                    hops_distribution.append(len(result["steps"]) - 1)
                else:
                    lose_count += 1

        final_results["hops_distribution"] = hops_distribution
        final_results["average_hops"] = sum(hops_distribution) / len(hops_distribution)
        final_results["win_rate"] = win_count / len(self.runs)
        final_results["lose_rate"] = lose_count / len(self.runs)

        with open(f"{self.output_dir}/{self.proctor_id}-final-results.json", "w") as f:
            json.dump(final_results, f, indent=4)