dags/inference/configs/trt_llm_mlperf_v41_config.py (190 lines of code) (raw):
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities to construct configs for MLPerf4.0 Reproduce DAG."""
import datetime
from typing import Dict, List
from dags.common import test_owner
from xlml.apis import gcp_config, metric_config, task, test_config
from dags.common import vm_resource
from dags.common.vm_resource import GpuVersion, Project, RuntimeVersion
RUNTIME_IMAGE = RuntimeVersion.TPU_UBUNTU2204_BASE.value
GCS_SUBFOLDER_PREFIX = test_owner.Team.INFERENCE.value
def get_trt_llm_mlperf_gpu_config(
machine_type: vm_resource.MachineVersion,
image_project: vm_resource.ImageProject,
image_family: vm_resource.ImageFamily,
accelerator_type: vm_resource.GpuVersion,
count: int,
gpu_zone: vm_resource.Zone,
time_out_in_min: int,
test_name: str,
project: Project,
network: str,
subnetwork: str,
existing_instance_name: str = None,
benchmark_configs: Dict = {},
model_parameters: Dict = {},
parameter_positions: Dict = {},
binary_search_steps: int = 1,
) -> task.GpuCreateResourceTask:
docker_container_name = "mlperf-inference"
set_up_cmds = (
# Install Nvidia driver
"wget -c https://us.download.nvidia.com/tesla/550.54.15/NVIDIA-Linux-x86_64-550.54.15.run",
"chmod u+x NVIDIA-Linux-x86_64-550.54.15.run",
"sudo ./NVIDIA-Linux-x86_64-550.54.15.run -x-module-path=/usr/lib/xorg/modules --ui=none -x-library-path=/usr/lib -q",
"sudo nvidia-smi -pm 1",
# Format and mount multiple Local SSD
"sudo apt update && sudo apt install mdadm --no-install-recommends",
"find /dev/ | grep google-local-nvme-ssd",
'sudo mdadm --create /dev/md0 --level=0 --raid-devices=$(find /dev/ -name "google-local-nvme-ssd*" | wc -l) $(find /dev/ -name "google-local-nvme-ssd*")',
"sudo mdadm --detail --prefer=by-id /dev/md0",
"sudo mkfs.ext4 -F /dev/md0",
"sudo mkdir -p /scratch",
"sudo mount /dev/md0 /scratch",
"sudo chmod a+w /scratch",
"cd /scratch",
# Prepare data
"gsutil -m cp -n -r gs://yijiaj/mlperf/v41/Google_GPU .",
"gsutil -m cp -n -r gs://tohaowu/mlpinf-v40/mlperf_inf_dlrmv2 .",
f"gsutil -m cp -n -r {benchmark_configs['models']} .",
f"gsutil -m cp -n -r {benchmark_configs['preprocessed_data']} .",
f"gsutil -m cp -n -r {benchmark_configs['docker_config']} .",
"curl -sSL https://get.docker.com/ | sh",
"sudo mkdir -p /home/cloud-ml-auto-solutions/.docker",
"sudo touch ~/.docker/config.json",
"sudo cp config.json ~/.docker/config.json",
"sudo chown cloud-ml-auto-solutions:cloud-ml-auto-solutions /home/cloud-ml-auto-solutions",
"sudo chmod a+w /home/cloud-ml-auto-solutions/.docker",
"cd Google_GPU",
"export MLPERF_SCRATCH_PATH=/scratch",
"sed -i '27i\ARCH=x86_64' Makefile",
"sed -i '29i\ARCH=x86_64' Makefile.docker",
"sed -i '29i\ARCH=x86_64' Makefile.const",
"sudo usermod -a -G docker $USER",
# Build and launch a docker container
"PARTNER_DROP=1 make prebuild DOCKER_DETACH=1",
"make docker_add_user",
f"make launch_docker DOCKER_NAME={docker_container_name} DOCKER_ARGS='-v /scratch/mlperf_inf_dlrmv2:/home/mlperf_inf_dlrmv2 -d'",
)
jsonl_output_path = "metric_report.jsonl"
jsonl_converter_py_lines = (
"import sys, json, glob, jsonlines",
"metadata_log_pattern = '/scratch/Google_GPU/build/logs/*/*/*/*/metadata.json'",
"metadata_log_paths = glob.glob(metadata_log_pattern)",
"def convert_to_jsonl(json_path, jsonl_path):",
" data = dict()",
" data['dimensions'] = dict()",
" data['metrics'] = dict()",
" with open(json_path, 'r') as file:",
" metadatadata = json.load(file)",
" for key in metadatadata:",
" try:",
" float(metadatadata[key])",
" data['metrics'][key] = float(metadatadata[key])",
" except:",
" data['dimensions'][key] = metadatadata[key]",
" with jsonlines.open(jsonl_path, 'a') as writer:",
" writer.write(data)",
"if __name__ == '__main__':",
" for metadata_log_path in metadata_log_paths:",
" convert_to_jsonl(metadata_log_path, sys.argv[1])",
)
py_script = "\n".join(jsonl_converter_py_lines)
make_jsonl_converter_cmd = f'echo "{py_script}" > jsonl_converter.py'
model_parameters_sweep_cmds = []
for model_name in benchmark_configs["model_name"].split(","):
scenario = ",".join(model_parameters[model_name])
if accelerator_type == GpuVersion.L4:
model_parameters_sweep_cmds.append(
f"CUDA_VISIBLE_DEVICES=0 make generate_engines RUN_ARGS='--benchmarks={model_name} --scenarios={scenario}'"
)
else:
model_parameters_sweep_cmds.append(
f"make generate_engines RUN_ARGS='--benchmarks={model_name} --scenarios={scenario}'"
)
for model_name in benchmark_configs["model_name"].split(","):
for scenario in model_parameters[model_name]:
for parameter in model_parameters[model_name][scenario]:
steps = 2 ** (binary_search_steps - 1) + 1
step_interval = round(
(
model_parameters[model_name][scenario][parameter][1]
- model_parameters[model_name][scenario][parameter][0]
)
/ (steps - 1),
2,
)
parameter_current_value = model_parameters[model_name][scenario][
parameter
][0]
while steps > 0:
if accelerator_type == GpuVersion.L4:
model_parameters_sweep_cmds.append(
f"CUDA_VISIBLE_DEVICES=0 make run_harness RUN_ARGS='--benchmarks={model_name} --scenarios={scenario}'"
)
else:
model_parameters_sweep_cmds.append(
f"make run_harness RUN_ARGS='--benchmarks={model_name} --scenarios={scenario}'"
)
current_value_str = str(parameter_current_value)
parameter_current_value = parameter_current_value + step_interval
next_value_str = str(parameter_current_value)
model_parameters_sweep_cmds.append(
f"sed -i '{parameter_positions[model_name][scenario][parameter]}s/{current_value_str}/{next_value_str}/' configs/{model_name}/{scenario}/__init__.py"
)
steps = steps - 1
docker_cmds = [
"make link_dirs",
"make build BUILD_TRTLLM=1",
"pip install huggingface_hub==0.24.7",
"lscpu",
]
if accelerator_type == GpuVersion.L4:
docker_cmds.append(
"sed -i '310s/16/24/' code/common/systems/known_hardware.py"
)
docker_cmds.extend(model_parameters_sweep_cmds)
docker_cmd = " && ".join(docker_cmds)
run_model_cmds = (
"pip install jsonlines",
f"docker restart {docker_container_name}",
f'docker exec -i {docker_container_name} /bin/bash -c "{docker_cmd}"',
make_jsonl_converter_cmd,
"cat jsonl_converter.py",
f"python3 jsonl_converter.py {jsonl_output_path}",
f"cat {jsonl_output_path}",
f"gsutil cp {jsonl_output_path} {metric_config.SshEnvVars.GCS_OUTPUT.value}",
)
job_test_config = test_config.GpuVmTest(
test_config.Gpu(
machine_type=machine_type.value,
image_family=image_family.value,
count=count,
accelerator_type=accelerator_type.value,
runtime_version=RUNTIME_IMAGE,
network=network,
subnetwork=subnetwork,
attach_local_ssd=True
if accelerator_type != GpuVersion.H100
else False,
disk_size_gb=1000,
),
test_name=test_name,
set_up_cmds=set_up_cmds,
run_model_cmds=run_model_cmds,
timeout=datetime.timedelta(minutes=time_out_in_min),
task_owner=test_owner.YIJIA_J,
gcs_subfolder=f"{GCS_SUBFOLDER_PREFIX}/trt_llm_mlperf_v41",
use_existing_instance=existing_instance_name is not None,
)
job_gcp_config = gcp_config.GCPConfig(
project_name=project.value,
zone=gpu_zone.value,
dataset_name=metric_config.DatasetOption.BENCHMARK_DATASET,
)
job_metric_config = metric_config.MetricConfig(
json_lines=metric_config.JSONLinesConfig("metric_report.jsonl"),
use_runtime_generated_gcs_folder=True,
)
return task.GpuCreateResourceTask(
image_project.value,
image_family.value,
job_test_config,
job_gcp_config,
job_metric_config,
existing_instance_name=existing_instance_name,
)