scripts/download_dataset.py (18 lines of code) (raw):

import os from datasets import load_dataset from huggingface_hub import snapshot_download # Set cache directory cache_dir = '%teamcity.build.workingDir%/dataset_cache' os.makedirs(cache_dir, exist_ok=True) # Download the dataset print("Downloading SWE-bench_Lite dataset...") dataset = load_dataset( "princeton-nlp/SWE-bench_Lite", cache_dir=cache_dir, split="test" ) print(f"Dataset cached to: {cache_dir}") # download the full dataset metadata snapshot_download( repo_id="princeton-nlp/SWE-bench_Lite", repo_type="dataset", cache_dir=cache_dir, local_dir=f"{cache_dir}/datasets/princeton-nlp/SWE-bench_Lite" )