in perfkitbenchmarker/linux_benchmarks/mlperf_inference_benchmark.py [0:0]
def Prepare(bm_spec: benchmark_spec.BenchmarkSpec) -> None:
"""Installs and sets up MLPerf Inference on the target vm.
Args:
bm_spec: The benchmark specification
Raises:
errors.Config.InvalidValue upon both GPUs and TPUs appear in the config
"""
vm = bm_spec.vms[0]
repository = f'inference_results_{MLPERF_INFERENCE_VERSION}'
vm.RemoteCommand(f'git clone https://github.com/mlcommons/{repository}.git')
makefile = f'{repository}/closed/NVIDIA/Makefile'
vm_util.ReplaceText(vm, 'shell uname -p', 'shell uname -m', makefile)
benchmark = FLAGS.mlperf_benchmark
custom_config = _CUSTOM_CONFIG.format(
benchmark=benchmark, scenario=_SCENARIOS.value.lower()
)
custom_config_path = posixpath.join(
repository,
_CUSTOM_CONFIG_PATH.format(
benchmark=benchmark, scenario=_SCENARIOS.value
),
)
vm.PushDataFile(custom_config, custom_config_path)
custom_config_list_path = posixpath.join(repository, _CUSTOM_CONFIG_LIST_PATH)
vm.PushDataFile(_CUSTOM_CONFIG_LIST, custom_config_list_path)
config = (
f'{repository}/closed/NVIDIA/configs/{benchmark}/{_SCENARIOS.value}/*.py'
)
if _SCENARIOS.value == SERVER:
bm_spec.metric = _SERVER_QPS
elif _SCENARIOS.value == OFFLINE:
bm_spec.metric = _OFFLINE_QPS
if _TARGET_QPS.value:
vm_util.ReplaceText(
vm,
f'{bm_spec.metric} = .*',
f'{bm_spec.metric} = {_TARGET_QPS.value}',
config,
)
if _BATCH_SIZE.value:
vm_util.ReplaceText(
vm,
'gpu_batch_size = .*',
f'gpu_batch_size = {_BATCH_SIZE.value}',
config,
)
if nvidia_driver.CheckNvidiaGpuExists(vm):
vm.Install('cuda_toolkit')
vm.Install('nvidia_driver')
vm.Install('nvidia_docker')
bm_spec.env_cmd = (
f'export MLPERF_SCRATCH_PATH={_MLPERF_SCRATCH_PATH} && '
f'cd {repository}/closed/NVIDIA'
)
docker.AddUser(vm)
vm.RobustRemoteCommand(
f'{bm_spec.env_cmd} && '
'make build_docker NO_BUILD=1 && '
'make docker_add_user && '
'make launch_docker DOCKER_COMMAND="make clean" && '
'make launch_docker DOCKER_COMMAND="make link_dirs"'
)
if benchmark == mlperf_benchmark.DLRM:
# Download data
data_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'data', _DLRM_DATA_MODULE)
# day_23.gz is 13.9 GB. Set timeout to 1 hour.
vm.DownloadPreprovisionedData(data_dir, _DLRM_DATA_MODULE, _DLRM_DATA, 3600)
vm.RemoteCommand(f'cd {data_dir} && gzip -d {_DLRM_DATA}')
# Download model
model_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'models', benchmark)
vm.DownloadPreprovisionedData(model_dir, benchmark, _DLRM_MODEL)
vm.RemoteCommand(
f'cd {model_dir} && tar -zxvf {_DLRM_MODEL} && rm -f {_DLRM_MODEL}'
)
# tb00_40M.pt is 89.5 GB. Set timeout to 4 hours.
vm.DownloadPreprovisionedData(model_dir, benchmark, _DLRM_ROW_FREQ, 14400)
# Preprocess Data
preprocessed_data_dir = posixpath.join(
_MLPERF_SCRATCH_PATH, 'preprocessed_data', _DLRM_DATA_MODULE
)
# full_recalib.tar.gz is 7.9 GB. Set timeout to 1 hour.
vm.DownloadPreprovisionedData(
preprocessed_data_dir, _DLRM_DATA_MODULE, _DLRM_PREPROCESSED_DATA, 3600
)
vm.RemoteCommand(
f'cd {preprocessed_data_dir} && '
f'tar -zxvf {_DLRM_PREPROCESSED_DATA} && '
f'rm -f {_DLRM_PREPROCESSED_DATA}'
)
elif benchmark == mlperf_benchmark.BERT:
# Download data
data_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'data', 'squad')
vm.DownloadPreprovisionedData(data_dir, benchmark, 'dev-v1.1.json')
# Download model
model_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'models', benchmark)
vm.DownloadPreprovisionedData(model_dir, benchmark, 'bert_large_v1_1.onnx')
vm.DownloadPreprovisionedData(
model_dir, benchmark, 'bert_large_v1_1_fake_quant.onnx'
)
vm.DownloadPreprovisionedData(model_dir, benchmark, 'vocab.txt')
# Preprocess Data
preprocessed_data_dir = posixpath.join(
_MLPERF_SCRATCH_PATH, 'preprocessed_data', 'squad_tokenized'
)
vm.DownloadPreprovisionedData(
preprocessed_data_dir, benchmark, 'input_ids.npy'
)
vm.DownloadPreprovisionedData(
preprocessed_data_dir, benchmark, 'input_mask.npy'
)
vm.DownloadPreprovisionedData(
preprocessed_data_dir, benchmark, 'segment_ids.npy'
)
else:
vm.RobustRemoteCommand(
f'{bm_spec.env_cmd} && '
'make launch_docker DOCKER_COMMAND='
f'"make download_data BENCHMARKS={benchmark}"'
)
vm.RobustRemoteCommand(
f'{bm_spec.env_cmd} && '
'make launch_docker DOCKER_COMMAND='
f'"make download_model BENCHMARKS={benchmark}"'
)
vm.RobustRemoteCommand(
f'{bm_spec.env_cmd} && '
'make launch_docker DOCKER_COMMAND='
f'"make preprocess_data BENCHMARKS={benchmark}"'
)
vm.RobustRemoteCommand(
f'{bm_spec.env_cmd} && '
'make launch_docker DOCKER_COMMAND='
'"make build" && '
'make launch_docker DOCKER_COMMAND='
'"make run RUN_ARGS=\''
f'--benchmarks={FLAGS.mlperf_benchmark} '
f'--scenarios={_SCENARIOS.value} --fast\'"'
)