def main()

in 5-4o_fine_tuning/eval.py [0:0]


def main():
    # Parse arguments
    parser = argparse.ArgumentParser(
        description="Run static analysis eval")
    parser.add_argument("--model", type=str,
                        default="gpt-4o-mini",
                        help="OpenAI model name (base or fine-tuned)")

    parser.add_argument("--n_shot", type=int, default=0,
                        help="# of examples for few-shot")

    parser.add_argument("--use_similarity", action="store_true",
                        help="Enable similarity-based retrieval of dataset examples")

    args = parser.parse_args()
    model_name = args.model
    n_shot = args.n_shot
    use_similarity = args.use_similarity

    # Load the eval dataset
    eval_dataset = load_dataset("patched-codes/static-analysis-eval",
                                split="train", download_mode='force_redownload')
    data = [{"file_name": item["file_name"], "source": item["source"],
             "cwe": item["cwe"]} for item in eval_dataset]

    manager = multiprocessing.Manager()
    fixed_files = manager.list()
    process_func = partial(process_file,
                           fixed_files=fixed_files,
                           model_name=model_name,
                           n_shot=n_shot,
                           use_similarity=use_similarity)
    total_tests = len(data)

    # Run the evals in parallel
    with multiprocessing.Pool(processes=max(1, multiprocessing.cpu_count() - 2)) as pool:
        results = list(tqdm(pool.imap(process_func, data), total=total_tests))

    # Aggregate results and log
    passing_tests = sum(results)
    score = passing_tests / total_tests * 100
    sanitized_model_name = f"{clean_filename(model_name)}-{n_shot}-shot" + (
        "-sim" if use_similarity else "")
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
    log_file_name = f"{sanitized_model_name}_{timestamp}.log"

    create_log_file(log_file_name=log_file_name, model_name=model_name, score=score,
                    passing_tests=passing_tests,
                    total_tests=total_tests,
                    fixed_files=fixed_files,
                    n_shot=n_shot,
                    use_similarity=use_similarity)

    print(
        f"Results for static analysis eval: {score:.2f}%\nLog file with results: {log_file_name}")