run_swelancer.py (35 lines of code) (raw):

from __future__ import annotations # Load environment before importing anything else from dotenv import load_dotenv load_dotenv() from swelancer import SWELancerEval import argparse import nanoeval from nanoeval.evaluation import EvalSpec, RunnerArgs from nanoeval.recorder import dummy_recorder from nanoeval.setup import nanoeval_entrypoint from swelancer_agent import SimpleAgentSolver def parse_args(): parser = argparse.ArgumentParser(description='Run SWELancer evaluation') parser.add_argument('--issue_ids', nargs='*', type=str, help='List of ISSUE_IDs to evaluate. If not specified, all issues will be evaluated.') return parser.parse_args() async def main() -> None: args = parse_args() taskset = args.issue_ids if args.issue_ids else None report = await nanoeval.run( EvalSpec( # taskset is a list of ISSUE_IDs you wish to evaluate (e.g., ["123", "456_789"]) eval=SWELancerEval( solver=SimpleAgentSolver(model="gpt-4o"), taskset=taskset ), runner=RunnerArgs( concurrency=25, experimental_use_multiprocessing=True, enable_slackbot=False, recorder=dummy_recorder(), max_retries=5 ), ) ) print(report) if __name__ == "__main__": nanoeval_entrypoint(main())