def _get_complexity_tests()

in evals/elsuite/function_deduction/eval.py [0:0]


    def _get_complexity_tests(self, metrics):
        solved = [x["complexity"] for x in metrics if x["success"]]
        not_solved = [x["complexity"] for x in metrics if not x["success"]]
        result = {
            "solved_avg_complexity": sum(solved) / len(solved) if solved else None,
            "not_solved_avg_complexity": sum(not_solved) / len(not_solved) if not_solved else None,
        }

        #   This tests if solved have lower complexity than non-solved
        if solved and not_solved:
            _, p_value = scipy.stats.mannwhitneyu(solved, not_solved, alternative="less")
        else:
            p_value = None
        result["solved_or_not_mann_whitney_u_p_value"] = p_value

        #   TODO: add more complexity-related metrics, such as correlation or linear regression coefficient.
        #         Leaving this for the future because we might want to change how the complexity is calculated,
        #         or generally improve the concept somehow.

        return result