perfkitbenchmarker/linux_benchmarks/hpcc_benchmark.py (474 lines of code) (raw):
# Copyright 2020 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Runs HPC Challenge.
Homepage: http://icl.cs.utk.edu/hpcc/
Most of the configuration of the HPC-Challenge revolves around HPL, the rest of
the HPCC piggybacks upon the HPL configuration.
Homepage: http://www.netlib.org/benchmark/hpl/
HPL requires a BLAS library (Basic Linear Algebra Subprograms)
OpenBlas: http://www.openblas.net/
Intel MKL: https://software.intel.com/en-us/mkl
HPL also requires a MPI (Message Passing Interface) Library
OpenMPI: http://www.open-mpi.org/
MPI needs to be configured:
Configuring MPI:
http://techtinkering.com/2009/12/02/setting-up-a-beowulf-cluster-using-open-mpi-on-linux/
Once HPL is built the configuration file must be created:
Configuring HPL.dat:
http://www.advancedclustering.com/faq/how-do-i-tune-my-hpldat-file.html
http://www.netlib.org/benchmark/hpl/faqs.html
"""
import dataclasses
import inspect
import logging
import math
import re
from typing import Any, Dict, List, Tuple
from absl import flags
from perfkitbenchmarker import background_tasks
from perfkitbenchmarker import benchmark_spec as bm_spec
from perfkitbenchmarker import configs
from perfkitbenchmarker import data
from perfkitbenchmarker import errors
from perfkitbenchmarker import hpc_util
from perfkitbenchmarker import linux_virtual_machine as linux_vm
from perfkitbenchmarker import regex_util
from perfkitbenchmarker import sample
from perfkitbenchmarker import vm_util
from perfkitbenchmarker.linux_packages import hpcc
from perfkitbenchmarker.linux_packages import intel_repo
from perfkitbenchmarker.linux_packages import intelmpi
from perfkitbenchmarker.linux_packages import mkl
from perfkitbenchmarker.linux_packages import numactl
from perfkitbenchmarker.linux_packages import openblas
FLAGS = flags.FLAGS
LOCAL_HPCCINF_FILE = 'hpccinf.j2'
HPCCINF_FILE = 'hpccinf.txt'
MACHINEFILE = 'machinefile'
BLOCK_SIZE = 192
STREAM_METRICS = ['Copy', 'Scale', 'Add', 'Triad']
MKL_TGZ = 'l_mkl_2018.2.199.tgz'
BENCHMARK_DATA = {
# Intel MKL package downloaded from:
# https://software.intel.com/en-us/mkl
# In order to get "l_mkl_2018.2.199.tgz", please choose the product
# "Intel Performance Libraries for Linux*", choose the version
# "2018 Update 2" and choose the download option "Intel
# Math Kernel Library(Intel Mkl)".
MKL_TGZ: 'e28d12173bef9e615b0ded2f95f59a42b3e9ad0afa713a79f8801da2bfb31936',
}
# File for mpirun to run that calls ./hpcc
HPCC_WRAPPER = 'hpcc_wrapper.sh'
BENCHMARK_NAME = 'hpcc'
BENCHMARK_CONFIG = """
hpcc:
description: Runs HPCC. Specify the number of VMs with --num_vms
vm_groups:
default:
vm_spec: *default_dual_core
vm_count: null
"""
SECONDS_PER_HOUR = 60 * 60
@dataclasses.dataclass(frozen=True)
class HpccDimensions:
"""Dimensions for the run.
Replaces values in the data/hpccinf.txt file. For more details see
http://www.netlib.org/benchmark/hpl/tuning.html . The value in quotes after
the field name is the corresponding attribute name in the hpccinf.txt file.
Attributes:
problem_size: 'Ns': the problem size.
block_size: 'NBs': number of blocks.
num_rows: 'Ps': number of rows for each grid.
num_columns: 'Qs': number of columns for each grid.
pfacts: 'PFACTs': matrix-vector operation based factorization.
nbmins: 'NBMINs': the number of columns at which to stop factorization.
rfacts: 'RFACTs': type of recursive panel factorization.
bcasts: 'BCASTs': methodology to broadcast the current panel.
depths: 'DEPTHs': look ahread depth.
swap: swapping algorithm to use.
l1: 'L1': whether the upper triangle of the panel of columns should be
stored in transposed form.
u: 'U': whether the panel of rows U should be stored in transposed form.
equilibration: whether to enable the equilibration phase.
"""
problem_size: int
block_size: int
num_rows: int
num_columns: int
pfacts: int
nbmins: int
rfacts: int
bcasts: int
depths: int
swap: int
l1: int
u: int
equilibration: int
# Translating the --hpcc_ flags into numbers in the HPL configuration file
PFACT_RFACT_MAPPING = {'left': 0, 'crout': 1, 'right': 2}
BCAST_MAPPING = {'1rg': 0, '1rM': 1, '2rg': 2, '2rM': 3, 'Lng': 4, 'LnM': 5}
SWAP_MAPPING = {'bin-exch': 0, 'long': 1, 'mix': 2}
L1_U_MAPPING = {True: 0, False: 1}
EQUILIBRATION_MAPPING = {True: 1, False: 0}
flags.DEFINE_integer(
'memory_size_mb',
None,
'The amount of memory in MB on each machine to use. By '
"default it will use the entire system's memory.",
)
flags.DEFINE_string(
'hpcc_binary',
None,
'The path of prebuilt hpcc binary to use. If not provided, '
'this benchmark built its own using OpenBLAS.',
)
flags.DEFINE_list(
'hpcc_mpi_env',
[],
'Comma separated list containing environment variables '
'to use with mpirun command. e.g. '
'MKL_DEBUG_CPU_TYPE=7,MKL_ENABLE_INSTRUCTIONS=AVX512',
)
flags.DEFINE_float(
'hpcc_timeout_hours',
4,
'The number of hours to wait for the HPCC binary to '
'complete before timing out and assuming it failed.',
)
flags.DEFINE_boolean(
'hpcc_numa_binding',
False,
'If True, attempt numa binding with membind and cpunodebind.',
)
# HPL.dat configuration parameters
CONFIG_PROBLEM_SIZE = flags.DEFINE_integer(
'hpcc_problem_size',
None,
'Size of problems to solve. Leave as None to run one single problem '
'whose size is based on the amount of memory.',
)
CONFIG_BLOCK_SIZE = flags.DEFINE_integer(
'hpcc_block_size',
None,
'Block size. Left as None to be based on the amount of memory.',
)
CONFIG_DIMENSIONS = flags.DEFINE_string(
'hpcc_dimensions',
None,
'Number of rows and columns in the array: "1,2" is 1 row, 2 columns. '
'Leave as None for computer to select based on number of CPUs.',
)
CONFIG_PFACTS = flags.DEFINE_enum(
'hpcc_pfacts',
'right',
sorted(PFACT_RFACT_MAPPING),
'What type of matrix-vector operation based factorization to use.',
)
CONFIG_NBMINS = flags.DEFINE_integer(
'hpcc_nbmins',
4,
'The number of columns at which to stop panel factorization.',
)
CONFIG_RFACTS = flags.DEFINE_enum(
'hpcc_rfacts',
'crout',
sorted(PFACT_RFACT_MAPPING),
'The type of recursive panel factorization to use.',
)
CONFIG_BCASTS = flags.DEFINE_enum(
'hpcc_bcasts',
'1rM',
sorted(BCAST_MAPPING),
'The broadcast methodology to use on the current panel.',
)
CONFIG_DEPTHS = flags.DEFINE_integer(
'hpcc_depths',
1,
'Look ahead depth. '
'0: next panel is factorized after current completely finished. '
'1: next panel is immediately factorized after current is updated.',
)
CONFIG_SWAP = flags.DEFINE_enum(
'hpcc_swap', 'mix', sorted(SWAP_MAPPING), 'Swapping algorithm to use.'
)
CONFIG_L1 = flags.DEFINE_boolean(
'hpcc_l1', True, 'Whether to store the upper triangle as transposed.'
)
CONFIG_U = flags.DEFINE_boolean(
'hpcc_u', True, 'Whether to store the U column as transposed.'
)
CONFIG_EQUILIBRATION = flags.DEFINE_boolean(
'hpcc_equilibration', True, 'Whether to enable the equilibration phase.'
)
def GetConfig(user_config: Dict[Any, Any]) -> Dict[Any, Any]:
return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
def CheckPrerequisites(_) -> None:
"""Verifies that the required resources are present.
Raises:
perfkitbenchmarker.data.ResourceNotFound: On missing resource.
NotImplementedError: On certain flag combination not currently supported.
"""
data.ResourcePath(LOCAL_HPCCINF_FILE)
if FLAGS['hpcc_binary'].present:
data.ResourcePath(FLAGS.hpcc_binary)
if FLAGS.hpcc_numa_binding and FLAGS.num_vms > 1:
raise errors.Setup.InvalidFlagConfigurationError(
'Numa binding with multiple hpcc vm not supported.'
)
if CONFIG_DIMENSIONS.value:
parts = CONFIG_DIMENSIONS.value.split(',')
if len(parts) != 2:
raise errors.Setup.InvalidFlagConfigurationError(
'For --hpcc_dimensions must have two values like "1,2" '
f'not "{CONFIG_DIMENSIONS.value}"'
)
if not (parts[0].isnumeric() and parts[1].isnumeric()):
raise errors.Setup.InvalidFlagConfigurationError(
'--hpcc_dimensions must be integers like "1,2" not '
f'"{parts[0]},{parts[1]}"'
)
if hpcc.USE_INTEL_COMPILED_HPL.value:
if FLAGS.hpcc_benchmarks != ['HPL']:
raise errors.Setup.InvalidFlagConfigurationError(
'Intel compiled HPCC can only run linpack (--hpcc_benchmarks=HPL)'
)
def _CalculateHpccDimensions(
num_vms: int, num_cpus: int, vm_memory_size_actual: int
) -> HpccDimensions:
"""Calculates the HPCC dimensions for the run."""
if FLAGS.memory_size_mb:
total_memory = FLAGS.memory_size_mb * 1024 * 1024 * num_vms
else:
total_memory = vm_memory_size_actual * 1024 * num_vms
total_cpus = num_cpus * num_vms
block_size = CONFIG_BLOCK_SIZE.value or BLOCK_SIZE
if CONFIG_PROBLEM_SIZE.value:
problem_size = CONFIG_PROBLEM_SIZE.value
else:
# Finds a problem size that will fit in memory and is a multiple of the
# block size.
base_problem_size = math.sqrt(total_memory * 0.1)
blocks = int(base_problem_size / block_size)
blocks = blocks if (blocks % 2) == 0 else blocks - 1
problem_size = block_size * blocks
if CONFIG_DIMENSIONS.value:
num_rows, num_columns = (
int(item) for item in CONFIG_DIMENSIONS.value.split(',')
)
else:
# Makes the grid as 'square' as possible, with rows < columns
sqrt_cpus = int(math.sqrt(total_cpus)) + 1
num_rows = 0
num_columns = 0
for i in reversed(list(range(sqrt_cpus))):
if total_cpus % i == 0:
num_rows = i
num_columns = total_cpus // i
break
return HpccDimensions(
problem_size=problem_size,
block_size=block_size,
num_rows=num_rows,
num_columns=num_columns,
pfacts=PFACT_RFACT_MAPPING[CONFIG_PFACTS.value],
nbmins=CONFIG_NBMINS.value,
rfacts=PFACT_RFACT_MAPPING[CONFIG_RFACTS.value],
bcasts=BCAST_MAPPING[CONFIG_BCASTS.value],
depths=CONFIG_DEPTHS.value,
swap=SWAP_MAPPING[CONFIG_SWAP.value],
l1=L1_U_MAPPING[CONFIG_L1.value],
u=L1_U_MAPPING[CONFIG_U.value],
equilibration=EQUILIBRATION_MAPPING[CONFIG_EQUILIBRATION.value],
)
def CreateHpccinf(
vm: linux_vm.BaseLinuxVirtualMachine, benchmark_spec: bm_spec.BenchmarkSpec
) -> HpccDimensions:
"""Creates the HPCC input file."""
dimensions = _CalculateHpccDimensions(
len(benchmark_spec.vms), vm.NumCpusForBenchmark(), vm.total_free_memory_kb
)
vm.RemoteCommand(f'rm -f {HPCCINF_FILE}')
vm.RenderTemplate(
data.ResourcePath(LOCAL_HPCCINF_FILE),
remote_path=HPCCINF_FILE,
context=dataclasses.asdict(dimensions),
)
return dimensions
def PrepareHpcc(vm: linux_vm.BaseLinuxVirtualMachine) -> None:
"""Builds HPCC on a single vm."""
logging.info('Building HPCC on %s', vm)
vm.Install('hpcc')
if FLAGS.hpcc_numa_binding:
vm.Install('numactl')
def PrepareBinaries(vms: List[linux_vm.BaseLinuxVirtualMachine]) -> None:
"""Prepare binaries on all vms."""
if hpcc.USE_INTEL_COMPILED_HPL.value:
intelmpi.NfsExportIntelDirectory(vms)
background_tasks.RunThreaded(lambda vm: vm.Install('numactl'), vms)
return
headnode_vm = vms[0]
if FLAGS.hpcc_binary:
headnode_vm.PushFile(data.ResourcePath(FLAGS.hpcc_binary), './hpcc')
else:
headnode_vm.RemoteCommand(f'cp {hpcc.HPCC_DIR}/hpcc hpcc')
background_tasks.RunThreaded(
lambda vm: _PrepareBinaries(headnode_vm, vm), vms[1:]
)
def _PrepareBinaries(
headnode_vm: linux_vm.BaseLinuxVirtualMachine,
vm: linux_vm.BaseLinuxVirtualMachine,
) -> None:
"""Prepares the binaries on the vm."""
vm.Install('fortran')
headnode_vm.MoveFile(vm, 'hpcc', 'hpcc')
headnode_vm.MoveFile(vm, '/usr/bin/orted', 'orted')
vm.RemoteCommand('sudo mv orted /usr/bin/orted')
if FLAGS.hpcc_math_library == hpcc.HPCC_MATH_LIBRARY_MKL:
intel_repo.CopyIntelFiles(headnode_vm, vm)
def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None:
"""Install HPCC on the target vms.
Args:
benchmark_spec: The benchmark specification. Contains all data that is
required to run the benchmark.
"""
vms = benchmark_spec.vms
headnode_vm = vms[0]
PrepareHpcc(headnode_vm)
CreateHpccinf(headnode_vm, benchmark_spec)
hpc_util.CreateMachineFile(vms, remote_path=MACHINEFILE)
headnode_vm.AuthenticateVm()
PrepareBinaries(vms)
def BaseMetadata(vm: linux_vm.BaseLinuxVirtualMachine) -> Dict[str, str]:
"""Update metadata with hpcc-related flag values."""
metadata = {}
metadata['memory_size_mb'] = FLAGS.memory_size_mb
if FLAGS['hpcc_binary'].present:
metadata['override_binary'] = FLAGS.hpcc_binary
if FLAGS['hpcc_mpi_env'].present:
metadata['mpi_env'] = FLAGS.hpcc_mpi_env
metadata['hpcc_math_library'] = FLAGS.hpcc_math_library
metadata['hpcc_version'] = hpcc.HPCC_VERSION
if FLAGS.hpcc_benchmarks:
metadata['hpcc_benchmarks'] = FLAGS.hpcc_benchmarks
if FLAGS.hpcc_math_library == hpcc.HPCC_MATH_LIBRARY_MKL:
metadata['math_library_version'] = mkl.MKL_VERSION.value
elif FLAGS.hpcc_math_library == hpcc.HPCC_MATH_LIBRARY_OPEN_BLAS:
metadata['math_library_version'] = openblas.GetVersion(vm)
metadata['openmpi_version'] = FLAGS.openmpi_version
if FLAGS.hpcc_numa_binding:
metadata['hpcc_numa_binding'] = FLAGS.hpcc_numa_binding
if hpcc.USE_INTEL_COMPILED_HPL.value:
metadata['hpcc_origin'] = 'intel'
metadata['intel_mpi_version'] = intelmpi.MPI_VERSION.value
else:
metadata['hpcc_origin'] = 'source'
return metadata
def ParseOutput(hpcc_output: str) -> List[sample.Sample]:
"""Parses the output from HPCC.
Args:
hpcc_output: A string containing the text of hpccoutf.txt.
Returns:
A list of samples to be published (in the same format as Run() returns).
"""
results = []
# Parse all metrics from metric=value lines in the HPCC output.
metric_values = regex_util.ExtractAllFloatMetrics(hpcc_output)
# For each benchmark that is run, collect the metrics and metadata for that
# benchmark from the metric_values map.
benchmarks_run = FLAGS.hpcc_benchmarks or hpcc.HPCC_METRIC_MAP
for benchmark in benchmarks_run:
for metric, units in hpcc.HPCC_METRIC_MAP[benchmark].items():
value = metric_values[metric]
# Common metadata for all runs done in Run's call to _AddCommonMetadata
metadata = {
metadata_item: metric_values[metadata_item]
for metadata_item in hpcc.HPCC_METADATA_MAP[benchmark]
}
results.append(sample.Sample(metric, value, units, metadata))
return results
def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> List[sample.Sample]:
"""Run HPCC on the cluster.
Args:
benchmark_spec: The benchmark specification. Contains all data that is
required to run the benchmark.
Returns:
A list of sample.Sample objects.
"""
# recreate the HPL config file with each run in case parameters change
dimensions = CreateHpccinf(benchmark_spec.vms[0], benchmark_spec)
logging.info('HPL.dat dimensions: %s', dimensions)
if hpcc.USE_INTEL_COMPILED_HPL.value:
samples = [RunIntelLinpack(benchmark_spec.vms, dimensions)]
else:
samples = RunHpccSource(benchmark_spec.vms)
_AddCommonMetadata(samples, benchmark_spec, dataclasses.asdict(dimensions))
return samples
def _AddCommonMetadata(
samples: List[sample.Sample],
benchmark_spec: bm_spec.BenchmarkSpec,
dimensions: Dict[str, Any],
) -> None:
"""Adds metadata common to all samples."""
for item in samples:
item.metadata.update(BaseMetadata(benchmark_spec.vms[0]))
item.metadata['num_machines'] = len(benchmark_spec.vms)
item.metadata.update(dimensions)
def RunHpccSource(
vms: List[linux_vm.BaseLinuxVirtualMachine],
) -> List[sample.Sample]:
"""Returns the parsed output from running the compiled from source HPCC."""
headnode_vm = vms[0]
# backup existing HPCC output, if any
headnode_vm.RemoteCommand(
'if [ -f hpccoutf.txt ]; then '
'mv hpccoutf.txt hpccoutf-$(date +%s).txt; '
'fi'
)
num_processes = len(vms) * headnode_vm.NumCpusForBenchmark()
run_as_root = '--allow-run-as-root' if FLAGS.mpirun_allow_run_as_root else ''
mpi_flags = (
f'-machinefile {MACHINEFILE} --mca orte_rsh_agent '
f'"ssh -o StrictHostKeyChecking=no" {run_as_root} {_MpiEnv()}'
)
mpi_cmd = 'mpirun '
hpcc_exec = './hpcc'
if FLAGS.hpcc_math_library == hpcc.HPCC_MATH_LIBRARY_MKL:
# Must exec HPCC wrapper script to pickup location of libiomp5.so
background_tasks.RunThreaded(_CreateHpccWrapper, vms)
hpcc_exec = f'./{HPCC_WRAPPER}'
if FLAGS.hpcc_numa_binding:
numa_map = numactl.GetNuma(headnode_vm)
numa_hpcc_cmd = []
for node, num_cpus in numa_map.items():
numa_hpcc_cmd.append(
f'-np {num_cpus} {mpi_flags} '
f'numactl --cpunodebind {node} '
f'--membind {node} {hpcc_exec}'
)
mpi_cmd += ' : '.join(numa_hpcc_cmd)
else:
mpi_cmd += f'-np {num_processes} {mpi_flags} {hpcc_exec}'
headnode_vm.RobustRemoteCommand(
f'ulimit -n 32768; {mpi_cmd}',
timeout=int(FLAGS.hpcc_timeout_hours * SECONDS_PER_HOUR),
)
logging.info('HPCC Results:')
stdout, _ = headnode_vm.RemoteCommand('cat hpccoutf.txt')
if stdout.startswith('HPL ERROR'):
# Annoyingly the mpi_cmd will succeed when there is an HPL error
raise errors.Benchmarks.RunError(f'Error running HPL: {stdout}')
return ParseOutput(stdout)
def _CreateHpccWrapper(vm: linux_vm.BaseLinuxVirtualMachine) -> None:
"""Creates a bash script to run HPCC on the VM.
This is required for when MKL is installed via the Intel repos as the
libiomp5.so file is not in /lib but rather in one found via sourcing the
mklvars.sh file.
Args:
vm: Virtual machine to put file on.
"""
text = ['#!/bin/bash', mkl.SourceVarsCommand(), './hpcc']
vm_util.CreateRemoteFile(vm, '\n'.join(text), HPCC_WRAPPER)
vm.RemoteCommand(f'chmod +x {HPCC_WRAPPER}')
def _MpiEnv(mpi_flag: str = '-x') -> str:
"""Returns the --hpcc_mpi_env flags as a string for the mpirun command."""
return ' '.join([f'{mpi_flag} {v}' for v in FLAGS.hpcc_mpi_env])
def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None:
"""Cleanup HPCC on the cluster.
Args:
benchmark_spec: The benchmark specification. Contains all data that is
required to run the benchmark.
"""
vms = benchmark_spec.vms
headnode_vm = vms[0]
headnode_vm.RemoveFile('hpcc*')
headnode_vm.RemoveFile(MACHINEFILE)
for vm in vms[1:]:
vm.RemoveFile('hpcc')
vm.RemoveFile('/usr/bin/orted')
def RunIntelLinpack(
vms: List[linux_vm.BaseLinuxVirtualMachine], dimensions: HpccDimensions
) -> sample.Sample:
"""Returns the parsed output from running the Intel compiled HPCC.
Unlike the compiled from source linpack run the Intel compiled linpack can
handle being cut off after --hpcc_timeout_hours as it parses the continuous
output of linpack, reporting the last value found as the HPL_Tflops.
The metadata argument value for "last_fraction_completed" is how much of the
run was completed before being cut off.
Args:
vms: List of VMs to run benchmark on.
dimensions: The HPCC configuration.
Returns: Sample of the HPL_Tflops for the run.
"""
vm = vms[0]
# Compiled from source HPL uses hpccinf.txt, one from Intel uses HPL.dat
vm.RemoteCommand(f'cp {HPCCINF_FILE} HPL.dat')
mpi_cmd, num_processes = _CreateIntelMpiRunCommand(vms, dimensions)
run_cmd_txt, _ = vm.RobustRemoteCommand(
mpi_cmd,
ignore_failure=True,
timeout=int(FLAGS.hpcc_timeout_hours * SECONDS_PER_HOUR),
)
file_text, _ = vm.RemoteCommand('cat HPL.out', ignore_failure=True)
tflops, metadata = _ParseIntelLinpackStdout(run_cmd_txt)
if file_text:
# HPL ran to completion, use the tflops from the file output
tflops = _ParseIntelLinpackOutputFile(file_text)
metadata['full'] = True
else:
# HPL timed out but have fractional metadata
metadata['full'] = False
metadata.update({
'num_processes': num_processes,
'per_host': vm.numa_node_count,
'mpi_cmd': mpi_cmd,
})
return sample.Sample('HPL_Tflops', tflops, 'Tflops/s', metadata)
def _CreateIntelMpiRunCommand(
vms: List[linux_vm.BaseLinuxVirtualMachine], dimensions: HpccDimensions
) -> Tuple[str, int]:
"""Creates the command to run HPL for Intel compiled linpack.
Args:
vms: List of virtual machines to run on.
dimensions: The HpccDimensions for the run
Returns:
Tuple of the mpirun command and the number of processes to be used.
"""
headnode = vms[0]
# Create the file for mpirun to execute
hpl_path = '/opt/intel/mkl/benchmarks/mp_linpack/xhpl_intel64_static'
bash_script = inspect.cleandoc(f"""
#!/bin/bash
export HPL_HOST_NODE=$((PMI_RANK % {headnode.numa_node_count}))
{hpl_path}
""")
run_file = './hpl_run'
for vm in vms:
vm_util.CreateRemoteFile(vm, bash_script + '\n', run_file)
vm.RemoteCommand(f'chmod +x {run_file}')
logging.info('Using precompiled HPL at %s', hpl_path)
num_processes = dimensions.num_rows * dimensions.num_columns
hosts = ','.join([vm.internal_ip for vm in vms])
mpi_cmd = (
f'{intelmpi.SourceMpiVarsCommand(headnode)}; '
'mpirun '
f'-perhost {headnode.numa_node_count} {_MpiEnv("-genv")} '
f'-np {num_processes} -host {hosts} {run_file}'
)
return mpi_cmd, num_processes
def _ParseIntelLinpackOutputFile(file_text: str) -> float:
"""Returns the tflops for the hpcc run.
The last entry that matches
WR11C2R4 50688 192 6 10 551.85 1.57334e+02
is the Gflops for the run: 157.33
Args:
file_text: The hpcc output file contents.
"""
line_re = re.compile(
r'\s+'.join([
r'WR\S+',
r'\d+',
r'\d+',
r'\d+',
r'\d+',
r'\d+\.\d+',
r'([\d\.e\+\-]+)',
])
)
gflops = None
for line in file_text.splitlines():
match = line_re.match(line)
if match:
gflops = float(match[1])
return gflops / 1000
def _ParseIntelLinpackStdout(stdout: str) -> Tuple[float, Dict[str, float]]:
"""Parse the stdout of Intel HPL returning a condensed sample of results.
Sample stdout:
pkb-123-0 : Column=000576 Fraction=0.005 Kernel= 0.58 Mflops=1265648.19
pkb-123-0 : Column=001152 Fraction=0.010 Kernel=969908.14 Mflops=1081059.81
pkb-123-0 : Column=001728 Fraction=0.015 Kernel=956391.64 Mflops=1040609.60
Return:
1.0406096,
{'fractions': '0.01,0.015',
'kernel_tflops': '0.96990814,0.95639164',
'last_fraction_completed': 0.015,
'tflops': '1.08105981,1.0406096'
}
Args:
stdout: The stdout text from running HPL
Returns:
Tuple of the tflops/s and a dict of the fractional run information.
Raises:
ValueError: If no metrics could be found.
"""
line_re = re.compile(
r"""Column=\s*(?P<column>\d+)
\s*Fraction=\s*(?P<fraction>[\d\.]+)
\s*Kernel=\s*(?P<kernel>[\d\.]+)
\s*Mflops=\s*(?P<mflops>[\d\.]+)""",
re.X,
)
fractions = []
kernel_tflops = []
tflops = []
line_matches = line_re.finditer(stdout)
try:
next(line_matches) # first outputted values are artificially low
except StopIteration:
raise ValueError(
f'Could not find a line in stdout to match {line_re.pattern}: {stdout}'
)
for line_match in line_matches:
fractions.append(float(line_match['fraction']))
kernel_tflops.append(float(line_match['kernel']) / 1e6)
tflops.append(float(line_match['mflops']) / 1e6)
if not tflops:
raise ValueError('No metrics found in stdout')
# Grab all the I_MPI* environment variables in debug output to put in metadata
intel_env_re = re.compile(
r'(.*MPI startup.*?)?\s*' r'(?P<key>I_MPI[A-Z_\d]+)=(?P<value>.*)\s*'
)
env_vars = {row['key']: row['value'] for row in intel_env_re.finditer(stdout)}
env_vars.pop('I_MPI_HYDRA_UUID', None)
metadata = {
'fractions': ','.join([str(x) for x in fractions]),
'kernel_tflops': ','.join([str(x) for x in kernel_tflops]),
'tflops': ','.join([str(x) for x in tflops]),
'last_fraction_completed': fractions[-1],
'intel_mpi_env': vm_util.DictionaryToEnvString(env_vars, ';'),
}
return tflops[-1], metadata