perfkitbenchmarker/pkb.py

# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Runs benchmarks in PerfKitBenchmarker. All benchmarks in PerfKitBenchmarker export the following interface: GetConfig: this returns, the name of the benchmark, the number of machines required to run one instance of the benchmark, a detailed description of the benchmark, and if the benchmark requires a scratch disk. Prepare: this function takes a list of VMs as an input parameter. The benchmark will then get all binaries required to run the benchmark and, if required, create data files. Run: this function takes a list of VMs as an input parameter. The benchmark will then run the benchmark upon the machines specified. The function will return a dictonary containing the results of the benchmark. Cleanup: this function takes a list of VMs as an input parameter. The benchmark will then return the machine to the state it was at before Prepare was called. PerfKitBenchmarker has the following run stages: provision, prepare, run, cleanup, teardown, and all. provision: Read command-line flags, decide what benchmarks to run, and create the necessary resources for each benchmark, including networks, VMs, disks, and keys, and generate a run_uri, which can be used to resume execution at later stages. prepare: Execute the Prepare function of each benchmark to install necessary software, upload datafiles, etc. run: Execute the Run function of each benchmark and collect the generated samples. The publisher may publish these samples according to PKB's settings. The Run stage can be called multiple times with the run_uri generated by the provision stage. cleanup: Execute the Cleanup function of each benchmark to uninstall software and delete data files. teardown: Delete VMs, key files, networks, and disks created in the 'provision' stage. all: PerfKitBenchmarker will run all of the above stages (provision, prepare, run, cleanup, teardown). Any resources generated in the provision stage will be automatically deleted in the teardown stage, even if there is an error in an earlier stage. When PKB is running in this mode, the run cannot be repeated or resumed using the run_uri. """ import collections from collections.abc import Mapping, MutableSequence import copy import itertools import json import logging import multiprocessing from os.path import isfile import pickle import random import re import sys import threading import time import types from typing import Any, Collection, Dict, List, Sequence, Set, Tuple, Type import uuid from absl import flags from perfkitbenchmarker import archive from perfkitbenchmarker import background_tasks from perfkitbenchmarker import benchmark_lookup from perfkitbenchmarker import benchmark_sets from perfkitbenchmarker import benchmark_spec as bm_spec from perfkitbenchmarker import benchmark_status from perfkitbenchmarker import configs from perfkitbenchmarker import context from perfkitbenchmarker import errors from perfkitbenchmarker import events from perfkitbenchmarker import flag_alias from perfkitbenchmarker import flag_util from perfkitbenchmarker import flags as pkb_flags from perfkitbenchmarker import linux_benchmarks from perfkitbenchmarker import linux_virtual_machine from perfkitbenchmarker import log_util from perfkitbenchmarker import os_types from perfkitbenchmarker import package_lookup from perfkitbenchmarker import providers from perfkitbenchmarker import publisher from perfkitbenchmarker import requirements from perfkitbenchmarker import sample from perfkitbenchmarker import stages from perfkitbenchmarker import static_virtual_machine from perfkitbenchmarker import time_triggers from perfkitbenchmarker import timing_util from perfkitbenchmarker import traces from perfkitbenchmarker import version from perfkitbenchmarker import virtual_machine from perfkitbenchmarker import vm_util from perfkitbenchmarker import windows_benchmarks from perfkitbenchmarker.configs import benchmark_config_spec from perfkitbenchmarker.linux_benchmarks import cluster_boot_benchmark from perfkitbenchmarker.linux_benchmarks import cuda_memcpy_benchmark from perfkitbenchmarker.linux_packages import build_tools # Add additional flags to ./flags.py # Keeping this flag here rather than flags.py to avoid a circular dependency # on benchmark_status. _RETRY_SUBSTATUSES = flags.DEFINE_multi_enum( 'retry_substatuses', benchmark_status.FailedSubstatus.RETRYABLE_SUBSTATUSES, benchmark_status.FailedSubstatus.RETRYABLE_SUBSTATUSES, 'The failure substatuses to retry on. By default, failed runs are run with ' 'the same previous config.', ) COMPLETION_STATUS_FILE_NAME = 'completion_statuses.json' REQUIRED_INFO = ['scratch_disk', 'num_machines'] REQUIRED_EXECUTABLES = frozenset(['ssh', 'ssh-keygen', 'scp', 'openssl']) MAX_RUN_URI_LENGTH = 12 FLAGS = flags.FLAGS # Define patterns for help text processing. BASE_RELATIVE = ( # Relative path from markdown output to PKB home for link writing. '../' ) MODULE_REGEX = r'^\s+?(.*?):.*' # Pattern that matches module names. FLAGS_REGEX = r'(^\s\s--.*?(?=^\s\s--|\Z))+?' # Pattern that matches each flag. FLAGNAME_REGEX = ( # Pattern that matches flag name in each flag. r'^\s+?(--.*?)(:.*\Z)' ) DOCSTRING_REGEX = ( # Pattern that matches triple quoted comments. r'"""(.*?|$)"""' ) _TEARDOWN_EVENT = multiprocessing.Event() _ANY_ZONE = 'any' events.register_tracers.connect(traces.RegisterAll) events.register_tracers.connect(time_triggers.RegisterAll) @flags.multi_flags_validator( ['smart_quota_retry', 'smart_capacity_retry', 'retries', 'zones', 'zone'], message=( 'Smart zone retries requires exactly one single zone from --zones ' 'or --zone, as well as retry count > 0.' ), ) def ValidateSmartZoneRetryFlags(flags_dict): """Validates smart zone retry flags.""" if flags_dict['smart_quota_retry'] or flags_dict['smart_capacity_retry']: if flags_dict['retries'] == 0: return False return (len(flags_dict['zones']) == 1 and not flags_dict['zone']) or ( len(flags_dict['zone']) == 1 and not flags_dict['zones'] ) return True @flags.multi_flags_validator( ['retries', 'run_stage'], message='Retries requires running all stages of the benchmark.', ) def ValidateRetriesAndRunStages(flags_dict): if flags_dict['retries'] > 0 and flags_dict['run_stage'] != stages.STAGES: return False return True def ParseSkipTeardownConditions( skip_teardown_conditions: Collection[str], ) -> Mapping[str, Mapping[str, float | None]]: """Parses the skip_teardown_conditions flag. Used by the validator below and flag_util.ShouldTeardown to separate conditions passed by the --skip_teardown_conditions flag into three tokens: metric, lower bound, upper_bound Initial regex parsing captures a metric (any string before a > or <), direction (the > or <), and a threshold (any number after the direction). Args: skip_teardown_conditions: list of conditions to parse Returns: list of tuples of (metric, lower_bound, upper_bound) Raises: ValueError: if any condition is invalid """ parsed_conditions = {} pattern = re.compile( r""" ([\w -]+) # Matches all characters that could appear in a metric name ([<>]) # Matches < or > ([\d+\.]+) # Matches any floating point number """, re.VERBOSE, ) for condition in skip_teardown_conditions: match = pattern.match(condition) if not match or len(match.groups()) != 3: raise ValueError( 'Invalid skip_teardown_conditions flag. Conditions must be in the ' 'format of:\n' '<metric><direction><threshold>;...;...\n' 'where metric is any string, direction is either > or <, and ' 'threshold is any number.' ) metric, direction, threshold = match.groups() # Raises ValueError if threshold is not a valid number. threshold = float(threshold) lower_bound = threshold if direction == '>' else None upper_bound = threshold if direction == '<' else None if metric not in parsed_conditions: parsed_conditions[metric] = { 'lower_bound': lower_bound, 'upper_bound': upper_bound, } continue # Update the existing metric's bound(s) if necessary. current_lower_bound = parsed_conditions[metric]['lower_bound'] if lower_bound is not None and ( current_lower_bound is None or lower_bound < current_lower_bound ): parsed_conditions[metric]['lower_bound'] = lower_bound current_upper_bound = parsed_conditions[metric]['upper_bound'] if upper_bound is not None and ( current_upper_bound is None or upper_bound > current_upper_bound ): parsed_conditions[metric]['upper_bound'] = upper_bound return parsed_conditions @flags.validator( 'skip_teardown_conditions', message='Invalid skip_teardown_conditions flag.', ) def ValidateSkipTeardownConditions(flags_dict: Mapping[str, Any]) -> bool: """Validates skip_teardown_conditions flag.""" if 'skip_teardown_conditions' not in flags_dict: return True try: ParseSkipTeardownConditions(flags_dict['skip_teardown_conditions']) return True except ValueError: return False def MetricMeetsConditions( metric_sample: Mapping[str, Any], conditions: Mapping[str, Mapping[str, float | None]], ) -> bool: """Checks if a metric sample meets any conditions. If a metric falls within the bounds of a condition, log the metric and the condition. Args: metric_sample: The metric sample to check conditions: The conditions to check against Returns: True if the metric sample meets any of the conditions, False otherwise. """ if metric_sample['metric'] not in conditions: return False target_condition = conditions[metric_sample['metric']] lower_bound = target_condition['lower_bound'] upper_bound = target_condition['upper_bound'] lower_bound_satisfied = ( lower_bound is not None and metric_sample['value'] > lower_bound ) upper_bound_satisfied = ( upper_bound is not None and metric_sample['value'] < upper_bound ) if lower_bound_satisfied and upper_bound_satisfied: logging.info( 'Skip teardown condition met: %s is greater than %s %s and less' ' than %s %s', metric_sample['metric'], lower_bound, metric_sample['unit'], upper_bound, metric_sample['unit'], ) return True # Requires that a metric meet both thresholds if lower_bound < upper_bound. elif ( lower_bound is not None and upper_bound is not None and lower_bound < upper_bound ): return False elif lower_bound_satisfied: logging.info( 'Skip teardown condition met: %s is greater than %s %s', metric_sample['metric'], lower_bound, metric_sample['unit'], ) return True elif upper_bound_satisfied: logging.info( 'Skip teardown condition met: %s is less than %s %s', metric_sample['metric'], upper_bound, metric_sample['unit'], ) return True return False def ShouldTeardown( skip_teardown_conditions: Mapping[str, Mapping[str, float | None]], samples: MutableSequence[Mapping[str, Any]], vms: Sequence[virtual_machine.BaseVirtualMachine] | None = None, skip_teardown_zonal_vm_limit: int | None = None, skip_teardown_on_command_timeout: bool = False, ) -> bool: """Checks all samples against all skip teardown conditions. Args: skip_teardown_conditions: list of tuples of: (metric, lower_bound, upper_bound) samples: list of samples to check against the conditions vms: list of VMs brought up by the benchmark skip_teardown_zonal_vm_limit: the maximum number of VMs in the zone that can be left behind. skip_teardown_on_command_timeout: a boolean indicating whether to skip teardown if the failure substatus is COMMAND_TIMEOUT Returns: True if the benchmark should teardown as usual, False if it should skip due to a condition being met. """ if not skip_teardown_conditions and not skip_teardown_on_command_timeout: return True if skip_teardown_on_command_timeout: for status_sample in samples: if ( status_sample['metadata'].get('failed_substatus') == benchmark_status.FailedSubstatus.COMMAND_TIMEOUT ): logging.warning( 'Skipping TEARDOWN phase due to COMMAND_TIMEOUT substatus.' ) return False if skip_teardown_zonal_vm_limit: for vm in vms: num_lingering_vms = vm.GetNumTeardownSkippedVms() if ( num_lingering_vms is not None and num_lingering_vms + len(vms) > skip_teardown_zonal_vm_limit ): logging.warning( 'Too many lingering VMs: tearing down resources regardless of skip' ' teardown conditions.' ) return True for metric_sample in samples: if MetricMeetsConditions(metric_sample, skip_teardown_conditions): logging.warning('Skipping TEARDOWN phase.') return False return True def _InjectBenchmarkInfoIntoDocumentation(): """Appends each benchmark's information to the main module's docstring.""" # TODO: Verify if there is other way of appending additional help # message. # Inject more help documentation # The following appends descriptions of the benchmarks and descriptions of # the benchmark sets to the help text. benchmark_sets_list = [ '%s: %s' % (set_name, benchmark_sets.BENCHMARK_SETS[set_name]['message']) for set_name in benchmark_sets.BENCHMARK_SETS ] sys.modules['__main__'].__doc__ = ( 'PerfKitBenchmarker version: {version}\n\n{doc}\n' 'Benchmarks (default requirements):\n' '\t{benchmark_doc}' ).format( version=version.VERSION, doc=__doc__, benchmark_doc=_GenerateBenchmarkDocumentation(), ) sys.modules['__main__'].__doc__ += '\n\nBenchmark Sets:\n\t%s' % '\n\t'.join( benchmark_sets_list ) def _ParseFlags(argv): """Parses the command-line flags.""" try: argv = FLAGS(argv) except flags.Error as e: logging.error(e) logging.info('For usage instructions, use --helpmatch={module_name}') logging.info('For example, ./pkb.py --helpmatch=benchmarks.fio') sys.exit(1) def _PrintHelp(matches=None): """Prints help for flags defined in matching modules. Args: matches: regex string or None. Filters help to only those whose name matched the regex. If None then all flags are printed. """ if not matches: print(FLAGS) else: flags_by_module = FLAGS.flags_by_module_dict() modules = sorted(flags_by_module) regex = re.compile(matches) for module_name in modules: if regex.search(module_name): print(FLAGS.module_help(module_name)) def _PrintHelpMD(matches=None): """Prints markdown formatted help for flags defined in matching modules. Works just like --helpmatch. Args: matches: regex string or None. Filters help to only those whose name matched the regex. If None then all flags are printed. Raises: RuntimeError: If unable to find module help. Eg: * all flags: `./pkb.py --helpmatchmd .*` > testsuite_docs/all.md * linux benchmarks: `./pkb.py --helpmatchmd linux_benchmarks.*` > testsuite_docs/linux_benchmarks.md * specific modules `./pkb.py --helpmatchmd iperf` > testsuite_docs/iperf.md * windows packages `./pkb.py --helpmatchmd windows_packages.*` > testsuite_docs/windows_packages.md * GCP provider: `./pkb.py --helpmatchmd providers.gcp.* > testsuite_docs/providers_gcp.md` """ flags_by_module = FLAGS.flags_by_module_dict() modules = sorted(flags_by_module) regex = re.compile(matches) for module_name in modules: if regex.search(module_name): # Compile regex patterns. module_regex = re.compile(MODULE_REGEX) flags_regex = re.compile(FLAGS_REGEX, re.MULTILINE | re.DOTALL) flagname_regex = re.compile(FLAGNAME_REGEX, re.MULTILINE | re.DOTALL) docstring_regex = re.compile(DOCSTRING_REGEX, re.MULTILINE | re.DOTALL) # Retrieve the helpmatch text to format. helptext_raw = FLAGS.module_help(module_name) # Converts module name to github linkable string. # eg: perfkitbenchmarker.linux_benchmarks.iperf_vpn_benchmark -> # perfkitbenchmarker/linux_benchmarks/iperf_vpn_benchmark.py match = re.search( module_regex, helptext_raw, ) if not match: raise RuntimeError( f'Unable to find "{module_regex}" in "{helptext_raw}"' ) module = match.group(1) module_link = module.replace('.', '/') + '.py' # Put flag name in a markdown code block for visibility. flags = re.findall(flags_regex, helptext_raw) flags[:] = [flagname_regex.sub(r'`\1`\2', flag) for flag in flags] # Get the docstring for the module without importing everything into our # namespace. Probably a better way to do this docstring = 'No description available' # Only pull doststrings from inside pkb source files. if isfile(module_link): with open(module_link) as f: source = f.read() # Get the triple quoted matches. docstring_match = re.search(docstring_regex, source) # Some modules don't have docstrings. # eg perfkitbenchmarker/providers/alicloud/flags.py if docstring_match is not None: docstring = docstring_match.group(1) # Format output and print here. if isfile(module_link): # Only print links for modules we can find. print('### [' + module, '](' + BASE_RELATIVE + module_link + ')\n') else: print('### ' + module + '\n') print('#### Description:\n\n' + docstring + '\n\n#### Flags:\n') print('\n'.join(flags) + '\n') def CheckVersionFlag(): """If the --version flag was specified, prints the version and exits.""" if FLAGS.version: print(version.VERSION) sys.exit(0) def _InitializeRunUri(): """Determines the PKB run URI and sets FLAGS.run_uri.""" if FLAGS.run_uri is None: if stages.PROVISION in FLAGS.run_stage: FLAGS.run_uri = str(uuid.uuid4())[-8:] else: # Attempt to get the last modified run directory. run_uri = vm_util.GetLastRunUri() if run_uri: FLAGS.run_uri = run_uri logging.warning( 'No run_uri specified. Attempting to run the following stages with ' '--run_uri=%s: %s', FLAGS.run_uri, ', '.join(FLAGS.run_stage), ) else: raise errors.Setup.NoRunURIError( 'No run_uri specified. Could not run the following stages: %s' % ', '.join(FLAGS.run_stage) ) elif not FLAGS.run_uri.isalnum() or len(FLAGS.run_uri) > MAX_RUN_URI_LENGTH: raise errors.Setup.BadRunURIError( 'run_uri must be alphanumeric and less ' 'than or equal to %d characters in ' 'length.' % MAX_RUN_URI_LENGTH ) def _CreateBenchmarkSpecs(): """Create a list of BenchmarkSpecs for each benchmark run to be scheduled. Returns: A list of BenchmarkSpecs. """ specs = [] benchmark_tuple_list = benchmark_sets.GetBenchmarksFromFlags() benchmark_counts = collections.defaultdict(itertools.count) for benchmark_module, user_config in benchmark_tuple_list: # Construct benchmark config object. name = benchmark_module.BENCHMARK_NAME # This expected_os_type check seems rather unnecessary. expected_os_types = os_types.ALL with flag_util.OverrideFlags(FLAGS, user_config.get('flags')): config_dict = benchmark_module.GetConfig(user_config) config_spec_class = getattr( benchmark_module, 'BENCHMARK_CONFIG_SPEC_CLASS', benchmark_config_spec.BenchmarkConfigSpec, ) config = config_spec_class( name, expected_os_types=expected_os_types, flag_values=FLAGS, **config_dict, ) # Assign a unique ID to each benchmark run. This differs even between two # runs of the same benchmark within a single PKB run. uid = name + str(next(benchmark_counts[name])) # Optional step to check flag values and verify files exist. check_prereqs = getattr(benchmark_module, 'CheckPrerequisites', None) if check_prereqs: try: with config.RedirectFlags(FLAGS): check_prereqs(config) except: logging.exception('Prerequisite check failed for %s', name) raise with config.RedirectFlags(FLAGS): specs.append( bm_spec.BenchmarkSpec.GetBenchmarkSpec(benchmark_module, config, uid) ) return specs def _WriteCompletionStatusFile(benchmark_specs, status_file): """Writes a completion status file. The file has one json object per line, each with the following format: { "name": <benchmark name>, "status": <completion status>, "failed_substatus": <failed substatus>, "status_detail": <descriptive string (if present)>, "flags": <flags dictionary> } Args: benchmark_specs: The list of BenchmarkSpecs that ran. status_file: The file object to write the json structures to. """ for spec in benchmark_specs: # OrderedDict so that we preserve key order in json file status_dict = collections.OrderedDict() status_dict['name'] = spec.name status_dict['status'] = spec.status if spec.failed_substatus: status_dict['failed_substatus'] = spec.failed_substatus if spec.status_detail: status_dict['status_detail'] = spec.status_detail status_dict['flags'] = spec.config.flags # Record freeze and restore path values. if pkb_flags.FREEZE_PATH.value: status_dict['flags']['freeze'] = pkb_flags.FREEZE_PATH.value if pkb_flags.RESTORE_PATH.value: status_dict['flags']['restore'] = pkb_flags.RESTORE_PATH.value status_file.write(json.dumps(status_dict) + '\n') def _SetRestoreSpec(spec: bm_spec.BenchmarkSpec) -> None: """Unpickles the spec to restore resources from, if provided.""" restore_path = pkb_flags.RESTORE_PATH.value if restore_path: logging.info('Using restore spec at path: %s', restore_path) with open(restore_path, 'rb') as spec_file: spec.restore_spec = pickle.load(spec_file) def _SetFreezePath(spec: bm_spec.BenchmarkSpec) -> None: """Sets the path to freeze resources to if provided.""" if pkb_flags.FREEZE_PATH.value: spec.freeze_path = pkb_flags.FREEZE_PATH.value logging.info('Using freeze path, %s', spec.freeze_path) def DoProvisionPhase( spec: bm_spec.BenchmarkSpec, timer: timing_util.IntervalTimer ): """Performs the Provision phase of benchmark execution. Args: spec: The BenchmarkSpec created for the benchmark. timer: An IntervalTimer that measures the start and stop times of resource provisioning. """ logging.info('Provisioning resources for benchmark %s', spec.name) events.before_phase.send(stages.PROVISION, benchmark_spec=spec) spec.ConstructResources() spec.CheckPrerequisites() # Pickle the spec before we try to create anything so we can clean # everything up on a second run if something goes wrong. spec.Pickle() events.register_tracers.send(parsed_flags=FLAGS) events.benchmark_start.send(benchmark_spec=spec) try: with timer.Measure('Resource Provisioning'): spec.Provision() finally: # Also pickle the spec after the resources are created so that # we have a record of things like AWS ids. Otherwise we won't # be able to clean them up on a subsequent run. spec.Pickle() events.after_phase.send(stages.PROVISION, benchmark_spec=spec) class InterruptChecker: """An class that check interrupt on VM.""" def __init__(self, vms): """Start check interrupt thread. Args: vms: A list of virtual machines. """ self.vms = vms self.check_threads = [] self.phase_status = threading.Event() for vm in vms: if vm.IsInterruptible(): check_thread = threading.Thread(target=self.CheckInterrupt, args=(vm,)) check_thread.start() self.check_threads.append(check_thread) def CheckInterrupt(self, vm): """Check interrupt. Args: vm: the virtual machine object. Returns: None """ while not self.phase_status.is_set(): vm.UpdateInterruptibleVmStatus(use_api=False) if vm.WasInterrupted(): return else: self.phase_status.wait(vm.GetInterruptableStatusPollSeconds()) def EndCheckInterruptThread(self): """End check interrupt thread.""" self.phase_status.set() for check_thread in self.check_threads: check_thread.join() def EndCheckInterruptThreadAndRaiseError(self): """End check interrupt thread and raise error. Raises: InsufficientCapacityCloudFailure when it catches interrupt. Returns: None """ self.EndCheckInterruptThread() if any(vm.IsInterruptible() and vm.WasInterrupted() for vm in self.vms): raise errors.Benchmarks.InsufficientCapacityCloudFailure('Interrupt') def DoPreparePhase( spec: bm_spec.BenchmarkSpec, timer: timing_util.IntervalTimer ): """Performs the Prepare phase of benchmark execution. Args: spec: The BenchmarkSpec created for the benchmark. timer: An IntervalTimer that measures the start and stop times of the benchmark module's Prepare function. """ logging.info('Preparing benchmark %s', spec.name) events.before_phase.send(stages.PREPARE, benchmark_spec=spec) with timer.Measure('BenchmarkSpec Prepare'): spec.Prepare() with timer.Measure('Benchmark Prepare'): spec.BenchmarkPrepare(spec) spec.StartBackgroundWorkload() if FLAGS.after_prepare_sleep_time: logging.info( 'Sleeping for %s seconds after the prepare phase.', FLAGS.after_prepare_sleep_time, ) time.sleep(FLAGS.after_prepare_sleep_time) events.after_phase.send(stages.PREPARE, benchmark_spec=spec) def DoRunPhase( spec: bm_spec.BenchmarkSpec, collector: publisher.SampleCollector, timer: timing_util.IntervalTimer, ): """Performs the Run phase of benchmark execution. Args: spec: The BenchmarkSpec created for the benchmark. collector: The SampleCollector object to add samples to. timer: An IntervalTimer that measures the start and stop times of the benchmark module's Run function. """ if FLAGS.before_run_pause: input('Hit enter to begin Run.') deadline = time.time() + FLAGS.run_stage_time run_number = 0 consecutive_failures = 0 last_publish_time = time.time() def _IsRunStageFinished(): if FLAGS.run_stage_time > 0: return time.time() > deadline else: return run_number >= FLAGS.run_stage_iterations while True: samples = [] logging.info('Running benchmark %s', spec.name) events.before_phase.send(stages.RUN, benchmark_spec=spec) events.trigger_phase.send() try: with timer.Measure('Benchmark Run'): samples = spec.BenchmarkRun(spec) except Exception: consecutive_failures += 1 if consecutive_failures > FLAGS.run_stage_retries: raise logging.exception( 'Run failed (consecutive_failures=%s); retrying.', consecutive_failures, ) else: consecutive_failures = 0 finally: events.after_phase.send(stages.RUN, benchmark_spec=spec) if FLAGS.run_stage_time or FLAGS.run_stage_iterations: for s in samples: s.metadata['run_number'] = run_number # Add boot time metrics on the first run iteration. if run_number == 0 and ( FLAGS.boot_samples or spec.name == cluster_boot_benchmark.BENCHMARK_NAME ): samples.extend(cluster_boot_benchmark.GetTimeToBoot(spec.vms)) # In order to collect GPU samples one of the VMs must have both an Nvidia # GPU and the nvidia-smi if FLAGS.gpu_samples: samples.extend(cuda_memcpy_benchmark.Run(spec)) if FLAGS.record_lscpu: samples.extend(linux_virtual_machine.CreateLscpuSamples(spec.vms)) if FLAGS.record_ulimit: samples.extend(linux_virtual_machine.CreateUlimitSamples(spec.vms)) if pkb_flags.RECORD_PROCCPU.value: samples.extend(linux_virtual_machine.CreateProcCpuSamples(spec.vms)) if FLAGS.record_cpu_vuln and run_number == 0: samples.extend(_CreateCpuVulnerabilitySamples(spec.vms)) if FLAGS.record_gcc: samples.extend(_CreateGccSamples(spec.vms)) if FLAGS.record_glibc: samples.extend(_CreateGlibcSamples(spec.vms)) # Mark samples as restored to differentiate from non freeze/restore runs. if FLAGS.restore: for s in samples: s.metadata['restore'] = True events.benchmark_samples_created.send(benchmark_spec=spec, samples=samples) events.all_samples_created.send(benchmark_spec=spec, samples=samples) collector.AddSamples(samples, spec.name, spec) if ( FLAGS.publish_after_run and FLAGS.publish_period is not None and FLAGS.publish_period < (time.time() - last_publish_time) ): collector.PublishSamples() last_publish_time = time.time() if pkb_flags.BETWEEN_RUNS_SLEEP_TIME.value > 0: logging.info( 'Sleeping for %s seconds after run %d.', FLAGS.between_runs_sleep_time, run_number, ) time.sleep(FLAGS.between_runs_sleep_time) run_number += 1 if _IsRunStageFinished(): if FLAGS.after_run_sleep_time: logging.info( 'Sleeping for %s seconds after the run phase.', FLAGS.after_run_sleep_time, ) time.sleep(FLAGS.after_run_sleep_time) break def DoCleanupPhase( spec: bm_spec.BenchmarkSpec, timer: timing_util.IntervalTimer ): """Performs the Cleanup phase of benchmark execution. Cleanup phase work should be delegated to spec.BenchmarkCleanup to allow non-PKB based cleanup if needed. Args: spec: The BenchmarkSpec created for the benchmark. timer: An IntervalTimer that measures the start and stop times of the benchmark module's Cleanup function. """ if FLAGS.before_cleanup_pause: input('Hit enter to begin Cleanup.') logging.info('Cleaning up benchmark %s', spec.name) events.before_phase.send(stages.CLEANUP, benchmark_spec=spec) if ( spec.always_call_cleanup or any([vm.is_static for vm in spec.vms]) or spec.dpb_service is not None ): spec.StopBackgroundWorkload() with timer.Measure('Benchmark Cleanup'): spec.BenchmarkCleanup(spec) events.after_phase.send(stages.CLEANUP, benchmark_spec=spec) def DoTeardownPhase( spec: bm_spec.BenchmarkSpec, collector: publisher.SampleCollector, timer: timing_util.IntervalTimer, ): """Performs the Teardown phase of benchmark execution. Teardown phase work should be delegated to spec.Delete to allow non-PKB based teardown if needed. Args: spec: The BenchmarkSpec created for the benchmark. collector: The SampleCollector object to add samples to (if collecting delete samples) timer: An IntervalTimer that measures the start and stop times of resource teardown. """ logging.info('Tearing down resources for benchmark %s', spec.name) events.before_phase.send(stages.TEARDOWN, benchmark_spec=spec) with timer.Measure('Resource Teardown'): spec.Delete() # Add delete time metrics after metadata collected if pkb_flags.MEASURE_DELETE.value: samples = cluster_boot_benchmark.MeasureDelete(spec.vms) collector.AddSamples(samples, spec.name, spec) events.after_phase.send(stages.TEARDOWN, benchmark_spec=spec) def _SkipPendingRunsFile(): if FLAGS.skip_pending_runs_file and isfile(FLAGS.skip_pending_runs_file): logging.warning( '%s exists. Skipping benchmark.', FLAGS.skip_pending_runs_file ) return True else: return False _SKIP_PENDING_RUNS_CHECKS = [] def RegisterSkipPendingRunsCheck(func): """Registers a function to skip pending runs. Args: func: A function which returns True if pending runs should be skipped. """ _SKIP_PENDING_RUNS_CHECKS.append(func) @events.before_phase.connect def _PublishStageStartedSamples( sender: str, benchmark_spec: bm_spec.BenchmarkSpec ): """Publish the start of each stage.""" if sender == stages.PROVISION and pkb_flags.CREATE_STARTED_RUN_SAMPLE.value: _PublishRunStartedSample(benchmark_spec) if pkb_flags.CREATE_STARTED_STAGE_SAMPLES.value: _PublishEventSample(benchmark_spec, f'{sender.capitalize()} Stage Started') def _PublishRunStartedSample(spec): """Publishes a sample indicating that a run has started. This sample is published immediately so that there exists some metric for any run (even if the process dies). Args: spec: The BenchmarkSpec object with run information. """ metadata = {'flags': str(flag_util.GetProvidedCommandLineFlags())} # Publish the path to this spec's PKB logs at the start of the runs. if log_util.PKB_LOG_BUCKET.value and FLAGS.run_uri: metadata['pkb_log_path'] = log_util.GetLogCloudPath( log_util.PKB_LOG_BUCKET.value, f'{FLAGS.run_uri}-pkb.log' ) if log_util.VM_LOG_BUCKET.value and FLAGS.run_uri: metadata['vm_log_path'] = log_util.GetLogCloudPath( log_util.VM_LOG_BUCKET.value, FLAGS.run_uri ) _PublishEventSample(spec, 'Run Started', metadata) def _PublishEventSample( spec: bm_spec.BenchmarkSpec, event: str, metadata: Dict[str, Any] | None = None, collector: publisher.SampleCollector | None = None, ): """Publishes a sample indicating the progress of the benchmark. Value of sample is time of event in unix seconds Args: spec: The BenchmarkSpec object with run information. event: The progress event to publish. metadata: optional metadata to publish about the event. collector: the SampleCollector to use. """ # N.B. SampleCollector seems stateless so re-using vs creating a new one seems # to have no effect. if not collector: collector = publisher.SampleCollector() collector.AddSamples( [sample.Sample(event, time.time(), 'seconds', metadata or {})], spec.name, spec, ) collector.PublishSamples() def _IsException(e: Exception, exception_class: Type[Exception]) -> bool: """Checks if the exception is of the class or contains the class name. When exceptions happen on on background theads (e.g. CreationInternalError on CreateAndBootVm) they are not propogated as exceptions to the caller, instead they are propagated as text inside a wrapper exception such as errors.VmUtil.ThreadException. Args: e: The exception instance to inspect. exception_class: The exception class to check if e is an instance of. Returns: true if the exception is of the class or contains the class name. """ if isinstance(e, exception_class): return True if str(exception_class.__name__) in str(e): return True return False def RunBenchmark( spec: bm_spec.BenchmarkSpec, collector: publisher.SampleCollector ): """Runs a single benchmark and adds the results to the collector. Args: spec: The BenchmarkSpec object with run information. collector: The SampleCollector object to add samples to. """ # Since there are issues with the handling SIGINT/KeyboardInterrupt (see # further discussion in _BackgroundProcessTaskManager) this mechanism is # provided for defense in depth to force skip pending runs after SIGINT. for f in _SKIP_PENDING_RUNS_CHECKS: if f(): logging.warning('Skipping benchmark.') return spec.status = benchmark_status.FAILED current_run_stage = stages.PROVISION # If the skip_teardown_conditions flag is set, we will check the samples # collected before the teardown phase to determine if we should skip teardown. should_teardown = True # Modify the logger prompt for messages logged within this function. label_extension = '{}({}/{})'.format( spec.name, spec.sequence_number, spec.total_benchmarks ) context.SetThreadBenchmarkSpec(spec) log_context = log_util.GetThreadLogContext() with log_context.ExtendLabel(label_extension): with spec.RedirectGlobalFlags(): end_to_end_timer = timing_util.IntervalTimer() detailed_timer = timing_util.IntervalTimer() interrupt_checker = None try: with end_to_end_timer.Measure('End to End'): _SetRestoreSpec(spec) _SetFreezePath(spec) if stages.PROVISION in FLAGS.run_stage: DoProvisionPhase(spec, detailed_timer) if stages.PREPARE in FLAGS.run_stage: current_run_stage = stages.PREPARE interrupt_checker = InterruptChecker(spec.vms) DoPreparePhase(spec, detailed_timer) interrupt_checker.EndCheckInterruptThreadAndRaiseError() interrupt_checker = None if stages.RUN in FLAGS.run_stage: current_run_stage = stages.RUN interrupt_checker = InterruptChecker(spec.vms) DoRunPhase(spec, collector, detailed_timer) interrupt_checker.EndCheckInterruptThreadAndRaiseError() interrupt_checker = None if stages.CLEANUP in FLAGS.run_stage: current_run_stage = stages.CLEANUP interrupt_checker = InterruptChecker(spec.vms) DoCleanupPhase(spec, detailed_timer) interrupt_checker.EndCheckInterruptThreadAndRaiseError() interrupt_checker = None if stages.TEARDOWN in FLAGS.run_stage: CaptureVMLogs(spec.vms) skip_teardown_conditions = ParseSkipTeardownConditions( pkb_flags.SKIP_TEARDOWN_CONDITIONS.value ) should_teardown = ShouldTeardown( skip_teardown_conditions, collector.published_samples + collector.samples, spec.vms, pkb_flags.SKIP_TEARDOWN_ZONAL_VM_LIMIT.value, pkb_flags.SKIP_TEARDOWN_ON_COMMAND_TIMEOUT.value, ) if should_teardown: current_run_stage = stages.TEARDOWN DoTeardownPhase(spec, collector, detailed_timer) else: for vm in spec.vms: vm.UpdateTimeoutMetadata() # Add timing samples. if ( FLAGS.run_stage == stages.STAGES and timing_util.EndToEndRuntimeMeasurementEnabled() ): collector.AddSamples( end_to_end_timer.GenerateSamples(), spec.name, spec ) if timing_util.RuntimeMeasurementsEnabled(): collector.AddSamples( detailed_timer.GenerateSamples(), spec.name, spec ) # Add resource related samples. collector.AddSamples(spec.GetSamples(), spec.name, spec) # except block will clean up benchmark specific resources on exception. It # may also clean up generic resources based on # FLAGS.always_teardown_on_exception. except (Exception, KeyboardInterrupt) as e: # Log specific type of failure, if known # TODO(dlott) Move to exception chaining with Python3 support if _IsException(e, errors.Benchmarks.InsufficientCapacityCloudFailure): spec.failed_substatus = ( benchmark_status.FailedSubstatus.INSUFFICIENT_CAPACITY ) elif _IsException(e, errors.Benchmarks.QuotaFailure): spec.failed_substatus = benchmark_status.FailedSubstatus.QUOTA elif ( _IsException(e, errors.Benchmarks.KnownIntermittentError) or _IsException(e, errors.Resource.CreationInternalError) or _IsException(e, errors.Resource.ProvisionTimeoutError) ): spec.failed_substatus = ( benchmark_status.FailedSubstatus.KNOWN_INTERMITTENT ) elif _IsException(e, errors.Benchmarks.UnsupportedConfigError): spec.failed_substatus = benchmark_status.FailedSubstatus.UNSUPPORTED elif _IsException(e, errors.Resource.RestoreError): spec.failed_substatus = ( benchmark_status.FailedSubstatus.RESTORE_FAILED ) elif _IsException(e, errors.Resource.FreezeError): spec.failed_substatus = benchmark_status.FailedSubstatus.FREEZE_FAILED elif isinstance(e, KeyboardInterrupt): spec.failed_substatus = ( benchmark_status.FailedSubstatus.PROCESS_KILLED ) elif _IsException(e, vm_util.TimeoutExceededRetryError): spec.failed_substatus = ( benchmark_status.FailedSubstatus.COMMAND_TIMEOUT ) elif _IsException(e, vm_util.RetriesExceededRetryError): spec.failed_substatus = ( benchmark_status.FailedSubstatus.RETRIES_EXCEEDED ) elif _IsException(e, errors.Config.InvalidValue): spec.failed_substatus = benchmark_status.FailedSubstatus.INVALID_VALUE elif _IsException(e, vm_util.ImageNotFoundError): spec.failed_substatus = benchmark_status.FailedSubstatus.UNSUPPORTED else: spec.failed_substatus = benchmark_status.FailedSubstatus.UNCATEGORIZED spec.status_detail = str(e) # Resource cleanup (below) can take a long time. Log the error to give # immediate feedback, then re-throw. logging.exception('Error during benchmark %s', spec.name) if FLAGS.create_failed_run_samples: PublishFailedRunSample(spec, str(e), current_run_stage, collector) # If the particular benchmark requests us to always call cleanup, do it # here. if stages.CLEANUP in FLAGS.run_stage and spec.always_call_cleanup: DoCleanupPhase(spec, detailed_timer) if ( FLAGS.always_teardown_on_exception and stages.TEARDOWN not in FLAGS.run_stage ): # Note that if TEARDOWN is specified, it will happen below. DoTeardownPhase(spec, collector, detailed_timer) raise # finally block will only clean up generic resources if teardown is # included in FLAGS.run_stage. finally: if interrupt_checker: interrupt_checker.EndCheckInterruptThread() # Deleting resources should happen first so any errors with publishing # don't prevent teardown. if stages.TEARDOWN in FLAGS.run_stage and should_teardown: spec.Delete() if FLAGS.publish_after_run: collector.PublishSamples() events.benchmark_end.send(benchmark_spec=spec) # Pickle spec to save final resource state. spec.Pickle() spec.status = benchmark_status.SUCCEEDED def PublishFailedRunSample( spec: bm_spec.BenchmarkSpec, error_message: str, run_stage_that_failed: str, collector: publisher.SampleCollector, ): """Publish a sample.Sample representing a failed run stage. The sample metric will have the name 'Run Failed'; the value will be the timestamp in Unix Seconds, and the unit will be 'seconds'. The sample metadata will include the error message from the Exception, the run stage that failed, as well as all PKB command line flags that were passed in. Args: spec: benchmark_spec error_message: error message that was caught, resulting in the run stage failure. run_stage_that_failed: run stage that failed by raising an Exception collector: the collector to publish to. """ # Note: currently all provided PKB command line flags are included in the # metadata. We may want to only include flags specific to the benchmark that # failed. This can be acomplished using gflag's FlagsByModuleDict(). metadata = { 'error_message': error_message[0 : FLAGS.failed_run_samples_error_length], 'run_stage': run_stage_that_failed, 'flags': str(flag_util.GetProvidedCommandLineFlags()), } background_tasks.RunThreaded( lambda vm: vm.UpdateInterruptibleVmStatus(use_api=True), spec.vms ) interruptible_vm_count = 0 interrupted_vm_count = 0 vm_status_codes = [] for vm in spec.vms: if vm.IsInterruptible(): interruptible_vm_count += 1 if vm.WasInterrupted(): interrupted_vm_count += 1 spec.failed_substatus = benchmark_status.FailedSubstatus.INTERRUPTED status_code = vm.GetVmStatusCode() if status_code: vm_status_codes.append(status_code) if spec.failed_substatus: metadata['failed_substatus'] = spec.failed_substatus if interruptible_vm_count: metadata.update({ 'interruptible_vms': interruptible_vm_count, 'interrupted_vms': interrupted_vm_count, 'vm_status_codes': vm_status_codes, }) if interrupted_vm_count: logging.error( '%d interruptible VMs were interrupted in this failed PKB run.', interrupted_vm_count, ) _PublishEventSample(spec, 'Run Failed', metadata, collector) def _ShouldRetry(spec: bm_spec.BenchmarkSpec) -> bool: """Returns whether the benchmark run should be retried.""" return ( spec.status == benchmark_status.FAILED and spec.failed_substatus in _RETRY_SUBSTATUSES.value ) def _GetMachineTypes(spec: bm_spec.BenchmarkSpec) -> list[str]: """Returns a deduped list of machine types to provision for the given spec.""" if FLAGS.machine_type: return [FLAGS.machine_type] results = set() for vm_group_spec in spec.vms_to_boot.values(): results.add(vm_group_spec.vm_spec.machine_type) return sorted(list(results)) def RunBenchmarkTask( spec: bm_spec.BenchmarkSpec, ) -> Tuple[Sequence[bm_spec.BenchmarkSpec], List[sample.SampleDict]]: """Task that executes RunBenchmark. This is designed to be used with RunParallelProcesses. Note that for retries only the last run has its samples published. Arguments: spec: BenchmarkSpec. The spec to call RunBenchmark with. Returns: A BenchmarkSpec for each run iteration and a list of samples from the last run. """ # Many providers name resources using run_uris. When running multiple # benchmarks in parallel, this causes name collisions on resources. # By modifying the run_uri, we avoid the collisions. if FLAGS.run_processes and FLAGS.run_processes > 1: spec.config.flags['run_uri'] = FLAGS.run_uri + str(spec.sequence_number) # Unset run_uri so the config value takes precedence. FLAGS['run_uri'].present = 0 zone_retry_manager = ZoneRetryManager(_GetMachineTypes(spec)) # Set the run count. max_run_count = 1 + pkb_flags.MAX_RETRIES.value # Useful format string for debugging. benchmark_info = ( f'{spec.sequence_number}/{spec.total_benchmarks} ' f'{spec.name} (UID: {spec.uid})' ) result_specs = [] for current_run_count in range(max_run_count): # Attempt to return the most recent results. if _TEARDOWN_EVENT.is_set(): if result_specs and collector: return result_specs, collector.samples return [spec], [] run_start_msg = ( '\n' + '-' * 85 + '\n' + 'Starting benchmark %s attempt %s of %s' + '\n' + '-' * 85 ) logging.info( run_start_msg, benchmark_info, current_run_count + 1, max_run_count ) collector = publisher.SampleCollector() # Make a new copy of the benchmark_spec for each run since currently a # benchmark spec isn't compatible with multiple runs. In particular, the # benchmark_spec doesn't correctly allow for a provision of resources # after tearing down. spec_for_run = copy.deepcopy(spec) result_specs.append(spec_for_run) try: RunBenchmark(spec_for_run, collector) except BaseException as e: # pylint: disable=broad-except logging.exception('Exception running benchmark') msg = f'Benchmark {benchmark_info} failed.' if isinstance(e, KeyboardInterrupt) or FLAGS.stop_after_benchmark_failure: logging.error('%s Execution will not continue.', msg) _TEARDOWN_EVENT.set() break logging.error('%s Execution will continue.', msg) # Don't retry on the last run. if _ShouldRetry(spec_for_run) and current_run_count != max_run_count - 1: logging.info( 'Benchmark should be retried. Waiting %s seconds before running.', pkb_flags.RETRY_DELAY_SECONDS.value, ) time.sleep(pkb_flags.RETRY_DELAY_SECONDS.value) # Handle smart retries if specified. zone_retry_manager.HandleSmartRetries(spec_for_run) else: logging.info( 'Benchmark should not be retried. Finished %s runs of %s', current_run_count + 1, max_run_count, ) break # We need to return both the spec and samples so that we know # the status of the test and can publish any samples that # haven't yet been published. return result_specs, collector.samples class ZoneRetryManager: """Encapsulates state and functions for zone retries. Attributes: original_zone: If specified, the original zone provided to the benchmark. zones_tried: Zones that have already been tried in previous runs. """ def __init__(self, machine_types: Collection[str]): self._CheckFlag(machine_types) if ( not pkb_flags.SMART_CAPACITY_RETRY.value and not pkb_flags.SMART_QUOTA_RETRY.value ): return self._machine_types = list(machine_types) self._zones_tried: Set[str] = set() self._regions_tried: Set[str] = set() self._utils: types.ModuleType = providers.LoadProviderUtils(FLAGS.cloud) self._SetOriginalZoneAndFlag() def _CheckMachineTypesAreSpecified( self, machine_types: Collection[str] ) -> None: if not machine_types: raise errors.Config.MissingOption( 'machine_type flag must be specified on the command line ' 'if zone=any feature is used.' ) def _GetCurrentZoneFlag(self): return FLAGS[self._zone_flag].value[0] def _CheckFlag(self, machine_types: Collection[str]) -> None: for zone_flag in ['zone', 'zones']: if FLAGS[zone_flag].value: self._zone_flag = zone_flag if self._GetCurrentZoneFlag() == _ANY_ZONE: self._CheckMachineTypesAreSpecified(machine_types) FLAGS['smart_capacity_retry'].parse(True) FLAGS['smart_quota_retry'].parse(True) def _SetOriginalZoneAndFlag(self) -> None: """Records the flag name and zone value that the benchmark started with.""" # This is guaranteed to set values due to flag validator. self._supported_zones = self._utils.GetZonesFromMachineType( self._machine_types[0] ) for machine_type in self._machine_types[1:]: self._supported_zones.intersection_update( self._utils.GetZonesFromMachineType(machine_type) ) if self._GetCurrentZoneFlag() == _ANY_ZONE: if pkb_flags.MAX_RETRIES.value < 1: FLAGS['retries'].parse(len(self._supported_zones)) self._ChooseAndSetNewZone(self._supported_zones) self._original_zone = self._GetCurrentZoneFlag() self._original_region = self._utils.GetRegionFromZone(self._original_zone) def HandleSmartRetries(self, spec: bm_spec.BenchmarkSpec) -> None: """Handles smart zone retry flags if provided. If quota retry, pick zone in new region. If unsupported or stockout retries, pick zone in same region. Args: spec: benchmark spec. """ if ( pkb_flags.SMART_QUOTA_RETRY.value and spec.failed_substatus == benchmark_status.FailedSubstatus.QUOTA ): self._AssignZoneToNewRegion() elif pkb_flags.SMART_CAPACITY_RETRY.value and spec.failed_substatus in { benchmark_status.FailedSubstatus.UNSUPPORTED, benchmark_status.FailedSubstatus.INSUFFICIENT_CAPACITY, }: self._AssignZoneToSameRegion() def _AssignZoneToNewRegion(self) -> None: """Changes zone to be a new zone in the different region.""" region = self._utils.GetRegionFromZone(self._GetCurrentZoneFlag()) self._regions_tried.add(region) regions_to_try = ( { self._utils.GetRegionFromZone(zone) for zone in self._supported_zones } - self._regions_tried ) # Restart from empty if we've exhausted all alternatives. if not regions_to_try: self._regions_tried.clear() new_region = self._original_region else: new_region = random.choice(tuple(regions_to_try)) logging.info('Retry using new region %s', new_region) self._ChooseAndSetNewZone(self._utils.GetZonesInRegion(new_region)) def _AssignZoneToSameRegion(self) -> None: """Changes zone to be a new zone in the same region.""" supported_zones_in_region = self._utils.GetZonesInRegion( self._original_region ).intersection(self._supported_zones) self._ChooseAndSetNewZone(supported_zones_in_region) def _ChooseAndSetNewZone(self, possible_zones: Set[str]) -> None: """Saves the current _zone_flag and sets it to a new zone. Args: possible_zones: The set of zones to choose from. """ current_zone = self._GetCurrentZoneFlag() if current_zone != _ANY_ZONE: self._zones_tried.add(current_zone) zones_to_try = possible_zones - self._zones_tried # Restart from empty if we've exhausted all alternatives. if not zones_to_try: self._zones_tried.clear() new_zone = self._original_zone else: new_zone = random.choice(tuple(zones_to_try)) logging.info('Retry using new zone %s', new_zone) FLAGS[self._zone_flag].unparse() FLAGS[self._zone_flag].parse([new_zone]) def _LogCommandLineFlags(): result = [] for name in FLAGS: flag = FLAGS[name] if flag.present: result.append(flag.serialize()) logging.info('Flag values:\n%s', '\n'.join(result)) def SetUpPKB(): """Set globals and environment variables for PKB. After SetUpPKB() returns, it should be possible to call PKB functions, like benchmark_spec.Prepare() or benchmark_spec.Run(). SetUpPKB() also modifies the local file system by creating a temp directory and storing new SSH keys. """ try: _InitializeRunUri() except errors.Error as e: logging.error(e) sys.exit(1) # Initialize logging. vm_util.GenTempDir() if FLAGS.use_pkb_logging: log_util.ConfigureLogging( stderr_log_level=log_util.LOG_LEVELS[FLAGS.log_level], log_path=vm_util.PrependTempDir(log_util.LOG_FILE_NAME), run_uri=FLAGS.run_uri, file_log_level=log_util.LOG_LEVELS[FLAGS.file_log_level], ) logging.info('PerfKitBenchmarker version: %s', version.VERSION) # Log all provided flag values. _LogCommandLineFlags() # Register skip pending runs functionality. RegisterSkipPendingRunsCheck(_SkipPendingRunsFile) # Check environment. if not FLAGS.ignore_package_requirements: requirements.CheckBasicRequirements() for executable in REQUIRED_EXECUTABLES: if not vm_util.ExecutableOnPath(executable): raise errors.Setup.MissingExecutableError( 'Could not find required executable "%s"' % executable ) # Check mutually exclusive flags if FLAGS.run_stage_iterations > 1 and FLAGS.run_stage_time > 0: raise errors.Setup.InvalidFlagConfigurationError( 'Flags run_stage_iterations and run_stage_time are mutually exclusive' ) vm_util.SSHKeyGen() if FLAGS.static_vm_file: with open(FLAGS.static_vm_file) as fp: static_virtual_machine.StaticVirtualMachine.ReadStaticVirtualMachineFile( fp ) benchmark_lookup.SetBenchmarkModuleFunction(benchmark_sets.BenchmarkModule) package_lookup.SetPackageModuleFunction(benchmark_sets.PackageModule) # Update max_concurrent_threads to use at least as many threads as VMs. This # is important for the cluster_boot benchmark where we want to launch the VMs # in parallel. if not FLAGS.max_concurrent_threads: FLAGS.max_concurrent_threads = max( background_tasks.MAX_CONCURRENT_THREADS, FLAGS.num_vms ) logging.info( 'Setting --max_concurrent_threads=%d.', FLAGS.max_concurrent_threads ) def RunBenchmarkTasksInSeries(tasks): """Runs benchmarks in series. Arguments: tasks: list of tuples of task: [(RunBenchmarkTask, (spec,), {}),] Returns: list of tuples of func results """ return [func(*args, **kwargs) for func, args, kwargs in tasks] def RunBenchmarks(): """Runs all benchmarks in PerfKitBenchmarker. Returns: Exit status for the process. """ benchmark_specs = _CreateBenchmarkSpecs() if FLAGS.randomize_run_order: random.shuffle(benchmark_specs) if FLAGS.dry_run: print('PKB will run with the following configurations:') for spec in benchmark_specs: print(spec) print('') return 0 benchmark_spec_lists = None collector = publisher.SampleCollector() try: tasks = [(RunBenchmarkTask, (spec,), {}) for spec in benchmark_specs] if FLAGS.run_processes is None: spec_sample_tuples = RunBenchmarkTasksInSeries(tasks) else: spec_sample_tuples = background_tasks.RunParallelProcesses( tasks, FLAGS.run_processes, FLAGS.run_processes_delay ) benchmark_spec_lists, sample_lists = list(zip(*spec_sample_tuples)) for sample_list in sample_lists: collector.samples.extend(sample_list) finally: if collector.samples: collector.PublishSamples() # Use the last run in the series of runs. if benchmark_spec_lists: benchmark_specs = [spec_list[-1] for spec_list in benchmark_spec_lists] if benchmark_specs: logging.info(benchmark_status.CreateSummary(benchmark_specs)) logging.info('Complete logs can be found at: %s', log_util.log_local_path) logging.info( 'Completion statuses can be found at: %s', vm_util.PrependTempDir(COMPLETION_STATUS_FILE_NAME), ) if stages.TEARDOWN not in FLAGS.run_stage: logging.info( 'To run again with this setup, please use --run_uri=%s', FLAGS.run_uri ) if FLAGS.archive_bucket: archive.ArchiveRun( vm_util.GetTempDir(), FLAGS.archive_bucket, gsutil_path=FLAGS.gsutil_path, prefix=FLAGS.run_uri + '_', ) # Write completion status file(s) if FLAGS.completion_status_file: with open(FLAGS.completion_status_file, 'w') as status_file: _WriteCompletionStatusFile(benchmark_specs, status_file) completion_status_file_name = vm_util.PrependTempDir( COMPLETION_STATUS_FILE_NAME ) with open(completion_status_file_name, 'w') as status_file: _WriteCompletionStatusFile(benchmark_specs, status_file) # Upload PKB logs to GCS after all benchmark runs are complete. log_util.CollectPKBLogs(run_uri=FLAGS.run_uri) all_benchmarks_succeeded = all( spec.status == benchmark_status.SUCCEEDED for spec in benchmark_specs ) return_code = 0 if all_benchmarks_succeeded else 1 logging.info('PKB exiting with return_code %s', return_code) return return_code def _GenerateBenchmarkDocumentation(): """Generates benchmark documentation to show in --help.""" benchmark_docs = [] for benchmark_module in ( linux_benchmarks.BENCHMARKS + windows_benchmarks.BENCHMARKS ): benchmark_config = configs.LoadMinimalConfig( benchmark_module.BENCHMARK_CONFIG, benchmark_module.BENCHMARK_NAME ) vm_groups = benchmark_config.get('vm_groups', {}) total_vm_count = 0 vm_str = '' scratch_disk_str = '' for group in vm_groups.values(): group_vm_count = group.get('vm_count', 1) if group_vm_count is None: vm_str = 'variable' else: total_vm_count += group_vm_count if group.get('disk_spec'): scratch_disk_str = ' with scratch volume(s)' name = benchmark_module.BENCHMARK_NAME if benchmark_module in windows_benchmarks.BENCHMARKS: name += ' (Windows)' benchmark_docs.append( '%s: %s (%s VMs%s)' % ( name, benchmark_config['description'], vm_str or total_vm_count, scratch_disk_str, ) ) return '\n\t'.join(benchmark_docs) def _CreateCpuVulnerabilitySamples(vms) -> List[sample.Sample]: """Returns samples of the VMs' CPU vulernabilites.""" def CreateSample(vm) -> sample.Sample | None: metadata = {'vm_name': vm.name} metadata.update(vm.cpu_vulnerabilities.asdict) return sample.Sample('cpu_vuln', 0, '', metadata) linux_vms = [vm for vm in vms if vm.OS_TYPE in os_types.LINUX_OS_TYPES] return background_tasks.RunThreaded(CreateSample, linux_vms) def _CreateGccSamples(vms): """Creates samples from linux VMs of gcc version output.""" def _GetGccMetadata(vm): return { 'name': vm.name, 'versiondump': build_tools.GetVersion(vm, 'gcc'), 'versioninfo': build_tools.GetVersionInfo(vm, 'gcc'), } linux_vms = [vm for vm in vms if vm.OS_TYPE in os_types.LINUX_OS_TYPES] return [ sample.Sample('gcc_version', 0, '', metadata) for metadata in background_tasks.RunThreaded(_GetGccMetadata, linux_vms) ] def _CreateGlibcSamples(vms): """Creates glibc samples from linux VMs of ldd output.""" def _GetGlibcVersionInfo(vm): out, _ = vm.RemoteCommand('ldd --version', ignore_failure=True) # return first line return out.splitlines()[0] if out else None def _GetGlibcMetadata(vm): return { 'name': vm.name, # TODO(user): Add glibc versiondump. 'versioninfo': _GetGlibcVersionInfo(vm), } linux_vms = [vm for vm in vms if vm.OS_TYPE in os_types.LINUX_OS_TYPES] return [ sample.Sample('glibc_version', 0, '', metadata) for metadata in background_tasks.RunThreaded(_GetGlibcMetadata, linux_vms) ] def _ParseMeminfo(meminfo_txt: str) -> Tuple[Dict[str, int], List[str]]: """Returns the parsed /proc/meminfo data. Response has entries such as {'MemTotal' : 32887056, 'Inactive': 4576524}. If the /proc/meminfo entry has two values such as MemTotal: 32887056 kB checks that the last value is 'kB' If it is not then adds that line to the 2nd value in the tuple. Args: meminfo_txt: contents of /proc/meminfo Returns: Tuple where the first entry is a dict of the parsed keys and the second are unparsed lines. """ data: Dict[str, int] = {} malformed: List[str] = [] for line in meminfo_txt.splitlines(): try: key, full_value = re.split(r':\s+', line) parts = full_value.split() if len(parts) == 1 or (len(parts) == 2 and parts[1] == 'kB'): data[key] = int(parts[0]) else: malformed.append(line) except ValueError: # If the line does not match "key: value" or if the value is not an int malformed.append(line) return data, malformed @events.benchmark_samples_created.connect def _CollectMeminfoHandler( unused_sender, benchmark_spec: bm_spec.BenchmarkSpec, samples: List[sample.Sample], ) -> None: """Optionally creates /proc/meminfo samples. If the flag --collect_meminfo is set appends a sample.Sample of /proc/meminfo data for every VM in the run. Parameter names cannot be changed as the method is called by events.send with keyword arguments. Args: benchmark_spec: The benchmark spec. samples: Generated samples that can be appended to. """ if not pkb_flags.COLLECT_MEMINFO.value: return def CollectMeminfo(vm): txt, _ = vm.RemoteCommand('cat /proc/meminfo') meminfo, malformed = _ParseMeminfo(txt) meminfo.update({ 'meminfo_keys': ','.join(sorted(meminfo)), 'meminfo_vmname': vm.name, 'meminfo_machine_type': vm.machine_type, 'meminfo_os_type': vm.OS_TYPE, }) if malformed: meminfo['meminfo_malformed'] = ','.join(sorted(malformed)) return sample.Sample('meminfo', 0, '', meminfo) linux_vms = [ vm for vm in benchmark_spec.vms if vm.OS_TYPE in os_types.LINUX_OS_TYPES ] samples.extend(background_tasks.RunThreaded(CollectMeminfo, linux_vms)) def CaptureVMLogs( vms: List[virtual_machine.BaseVirtualMachine], ) -> None: """Generates and captures VM logs.""" if pkb_flags.CAPTURE_VM_LOGS.value: for vm in vms: vm_log_files = vm.GenerateAndCaptureLogs() logging.info( 'Captured the following logs for VM %s: %s', vm.name, vm_log_files ) for log_path in vm_log_files: log_util.CollectVMLogs(FLAGS.run_uri, log_path) def ParseArgs(): """Parse command line arguments .""" argv = flag_alias.AliasFlagsFromArgs(sys.argv) _ParseFlags(argv) if FLAGS.helpmatch: _PrintHelp(FLAGS.helpmatch) return 0 if FLAGS.helpmatchmd: _PrintHelpMD(FLAGS.helpmatchmd) return 0 if not FLAGS.accept_licenses: logging.warning( 'Please run with the --accept_licenses flag to ' 'acknowledge PKB may install software on your behalf.' ) CheckVersionFlag() SetUpPKB() def Main(): """Entrypoint for PerfKitBenchmarker.""" assert sys.version_info >= (3, 11), 'PerfKitBenchmarker requires Python 3.11+' log_util.ConfigureBasicLogging() _InjectBenchmarkInfoIntoDocumentation() ParseArgs() return RunBenchmarks()

perfkitbenchmarker/pkb.py (1,209 lines of code) (raw):