def Prepare()

in perfkitbenchmarker/linux_benchmarks/mlperf_inference_benchmark.py [0:0]


def Prepare(bm_spec: benchmark_spec.BenchmarkSpec) -> None:
  """Installs and sets up MLPerf Inference on the target vm.

  Args:
    bm_spec: The benchmark specification

  Raises:
    errors.Config.InvalidValue upon both GPUs and TPUs appear in the config
  """
  vm = bm_spec.vms[0]

  repository = f'inference_results_{MLPERF_INFERENCE_VERSION}'
  vm.RemoteCommand(f'git clone https://github.com/mlcommons/{repository}.git')

  makefile = f'{repository}/closed/NVIDIA/Makefile'
  vm_util.ReplaceText(vm, 'shell uname -p', 'shell uname -m', makefile)

  benchmark = FLAGS.mlperf_benchmark

  custom_config = _CUSTOM_CONFIG.format(
      benchmark=benchmark, scenario=_SCENARIOS.value.lower()
  )
  custom_config_path = posixpath.join(
      repository,
      _CUSTOM_CONFIG_PATH.format(
          benchmark=benchmark, scenario=_SCENARIOS.value
      ),
  )
  vm.PushDataFile(custom_config, custom_config_path)

  custom_config_list_path = posixpath.join(repository, _CUSTOM_CONFIG_LIST_PATH)
  vm.PushDataFile(_CUSTOM_CONFIG_LIST, custom_config_list_path)

  config = (
      f'{repository}/closed/NVIDIA/configs/{benchmark}/{_SCENARIOS.value}/*.py'
  )
  if _SCENARIOS.value == SERVER:
    bm_spec.metric = _SERVER_QPS
  elif _SCENARIOS.value == OFFLINE:
    bm_spec.metric = _OFFLINE_QPS
  if _TARGET_QPS.value:
    vm_util.ReplaceText(
        vm,
        f'{bm_spec.metric} = .*',
        f'{bm_spec.metric} = {_TARGET_QPS.value}',
        config,
    )

  if _BATCH_SIZE.value:
    vm_util.ReplaceText(
        vm,
        'gpu_batch_size = .*',
        f'gpu_batch_size = {_BATCH_SIZE.value}',
        config,
    )

  if nvidia_driver.CheckNvidiaGpuExists(vm):
    vm.Install('cuda_toolkit')
    vm.Install('nvidia_driver')
    vm.Install('nvidia_docker')

  bm_spec.env_cmd = (
      f'export MLPERF_SCRATCH_PATH={_MLPERF_SCRATCH_PATH} && '
      f'cd {repository}/closed/NVIDIA'
  )
  docker.AddUser(vm)
  vm.RobustRemoteCommand(
      f'{bm_spec.env_cmd} && '
      'make build_docker NO_BUILD=1 && '
      'make docker_add_user && '
      'make launch_docker DOCKER_COMMAND="make clean" && '
      'make launch_docker DOCKER_COMMAND="make link_dirs"'
  )
  if benchmark == mlperf_benchmark.DLRM:
    # Download data
    data_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'data', _DLRM_DATA_MODULE)
    # day_23.gz is 13.9 GB. Set timeout to 1 hour.
    vm.DownloadPreprovisionedData(data_dir, _DLRM_DATA_MODULE, _DLRM_DATA, 3600)
    vm.RemoteCommand(f'cd {data_dir} && gzip -d {_DLRM_DATA}')

    # Download model
    model_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'models', benchmark)
    vm.DownloadPreprovisionedData(model_dir, benchmark, _DLRM_MODEL)
    vm.RemoteCommand(
        f'cd {model_dir} && tar -zxvf {_DLRM_MODEL} && rm -f {_DLRM_MODEL}'
    )
    # tb00_40M.pt is 89.5 GB. Set timeout to 4 hours.
    vm.DownloadPreprovisionedData(model_dir, benchmark, _DLRM_ROW_FREQ, 14400)

    # Preprocess Data
    preprocessed_data_dir = posixpath.join(
        _MLPERF_SCRATCH_PATH, 'preprocessed_data', _DLRM_DATA_MODULE
    )
    # full_recalib.tar.gz is 7.9 GB. Set timeout to 1 hour.
    vm.DownloadPreprovisionedData(
        preprocessed_data_dir, _DLRM_DATA_MODULE, _DLRM_PREPROCESSED_DATA, 3600
    )
    vm.RemoteCommand(
        f'cd {preprocessed_data_dir} && '
        f'tar -zxvf {_DLRM_PREPROCESSED_DATA} && '
        f'rm -f {_DLRM_PREPROCESSED_DATA}'
    )
  elif benchmark == mlperf_benchmark.BERT:
    # Download data
    data_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'data', 'squad')
    vm.DownloadPreprovisionedData(data_dir, benchmark, 'dev-v1.1.json')

    # Download model
    model_dir = posixpath.join(_MLPERF_SCRATCH_PATH, 'models', benchmark)
    vm.DownloadPreprovisionedData(model_dir, benchmark, 'bert_large_v1_1.onnx')
    vm.DownloadPreprovisionedData(
        model_dir, benchmark, 'bert_large_v1_1_fake_quant.onnx'
    )
    vm.DownloadPreprovisionedData(model_dir, benchmark, 'vocab.txt')

    # Preprocess Data
    preprocessed_data_dir = posixpath.join(
        _MLPERF_SCRATCH_PATH, 'preprocessed_data', 'squad_tokenized'
    )
    vm.DownloadPreprovisionedData(
        preprocessed_data_dir, benchmark, 'input_ids.npy'
    )
    vm.DownloadPreprovisionedData(
        preprocessed_data_dir, benchmark, 'input_mask.npy'
    )
    vm.DownloadPreprovisionedData(
        preprocessed_data_dir, benchmark, 'segment_ids.npy'
    )
  else:
    vm.RobustRemoteCommand(
        f'{bm_spec.env_cmd} && '
        'make launch_docker DOCKER_COMMAND='
        f'"make download_data BENCHMARKS={benchmark}"'
    )
    vm.RobustRemoteCommand(
        f'{bm_spec.env_cmd} && '
        'make launch_docker DOCKER_COMMAND='
        f'"make download_model BENCHMARKS={benchmark}"'
    )
    vm.RobustRemoteCommand(
        f'{bm_spec.env_cmd} && '
        'make launch_docker DOCKER_COMMAND='
        f'"make preprocess_data BENCHMARKS={benchmark}"'
    )

  vm.RobustRemoteCommand(
      f'{bm_spec.env_cmd} && '
      'make launch_docker DOCKER_COMMAND='
      '"make build" && '
      'make launch_docker DOCKER_COMMAND='
      '"make run RUN_ARGS=\''
      f'--benchmarks={FLAGS.mlperf_benchmark} '
      f'--scenarios={_SCENARIOS.value} --fast\'"'
  )