perfkitbenchmarker/configs/__init__.py (160 lines of code) (raw):
# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Configuration files for benchmarks.
Each benchmark has a default configuration defined inside its module.
The configuration is written in YAML (www.yaml.org) and specifies what
resources are needed to run the benchmark. Users can write their own
config files, which will be merged with the default configuration. These
config files specify overrides to the default configuration. Users can also
specify which benchmarks to run in the same config file.
Valid top level keys:
benchmarks: A YAML array of dictionaries mapping benchmark names to their
configs. This also determines which benchmarks to run.
flags: A YAML dictionary with overrides for default flag values. Benchmark
config specific flags override those specified here.
*any_benchmark_name*: If the 'benchmarks' key is not specified, then
specifying a benchmark name mapped to a config will override
that benchmark's default configuration in the event that that
benchmark is run.
Valid config keys:
vm_groups: A YAML dictionary mapping the names of VM groups to the groups
themselves. These names can be any string.
description: A description of the benchmark.
flags: A YAML dictionary with overrides for default flag values.
Valid VM group keys:
vm_spec: A YAML dictionary mapping names of clouds (e.g. AWS) to the
actual VM spec.
disk_spec: A YAML dictionary mapping names of clouds to the actual
disk spec.
vm_count: The number of VMs to create in this group. If this key isn't
specified, it defaults to 1.
disk_count: The number of disks to attach to VMs of this group. If this key
isn't specified, it defaults to 1.
cloud: The name of the cloud to create the group in. This is used for
multi-cloud configurations.
os_type: The OS type of the VMs to create (see the flag of the same name for
more information). This is used if you want to run a benchmark using VMs
with different OS types (e.g. Debian and RHEL).
static_vms: A YAML array of Static VM specs. These VMs will be used before
any Cloud VMs are created. The total number of VMs will still add up to
the number specified by the 'vm_count' key.
For valid VM spec keys, see virtual_machine.BaseVmSpec and derived classes.
For valid disk spec keys, see disk.BaseDiskSpec and derived classes.
See configs.spec.BaseSpec for more information about adding additional keys to
VM specs, disk specs, or any component of the benchmark configuration
dictionary.
"""
import copy
import functools
import json
import logging
import re
from absl import flags
import contextlib2
from perfkitbenchmarker import data
from perfkitbenchmarker import errors
import yaml
FLAGS = flags.FLAGS
CONFIG_CONSTANTS = 'default_config_constants.yaml'
DEFAULT_BENCHMARK_CONFIG = 'default_benchmark_config.yaml'
FLAGS_KEY = 'flags'
IMPORT_REGEX = re.compile('^#import (.*)')
flags.DEFINE_string(
'benchmark_config_file',
None,
'The file path to the user config file which will '
'override benchmark defaults. This should either be '
'a path relative to the current working directory, '
'an absolute path, or just the name of a file in the '
'configs/ directory.',
)
flags.DEFINE_multi_string(
'config_override',
None,
'This flag can be used to override any config value. It is applied after '
'the user config (specified via --benchmark_config_file_path), so it has '
'a higher priority than that config. The value of the flag should be '
'fully.qualified.key=value (e.g. --config_override=cluster_boot.vm_groups.'
'default.vm_count=4).',
)
class _ConcatenatedFiles:
"""Class that presents several files as a single object.
The class exposes a single method (read) which is all that yaml
needs to interact with a stream.
Attributes:
files: A list of opened file objects.
current_file_index: The index of the current file that is being read from.
"""
def __init__(self, files):
self.files = files
self.current_file_index = 0
def read(self, length):
data = self.files[self.current_file_index].read(length)
while (not data) and (self.current_file_index + 1 < len(self.files)):
self.current_file_index += 1
data = self.files[self.current_file_index].read(length)
return data
def _GetImportFiles(config_file, imported_set=None):
"""Get a list of file names that get imported from config_file.
Args:
config_file: The name of a config file to find imports for.
imported_set: A set of files that _GetImportFiles has already been called on
that should be ignored.
Returns:
A list of file names that are imported by config_file
(including config_file itself).
"""
imported_set = imported_set or set()
config_path = data.ResourcePath(config_file)
# Give up on circular imports.
if config_path in imported_set:
return []
imported_set.add(config_path)
with open(config_path) as f:
line = f.readline()
match = IMPORT_REGEX.match(line)
import_files = []
while match:
import_file = match.group(1)
for file_name in _GetImportFiles(import_file, imported_set):
if file_name not in import_files:
import_files.append(file_name)
line = f.readline()
match = IMPORT_REGEX.match(line)
import_files.append(config_path)
return import_files
def _LoadDefaultConfig():
"""Loads the default config from the supplied path."""
with open(data.ResourcePath(DEFAULT_BENCHMARK_CONFIG, False)) as fp:
return yaml.safe_load(fp)
def _LoadUserConfig(path):
"""Loads a user config from the supplied path."""
config_files = _GetImportFiles(path)
with contextlib2.ExitStack() as stack:
files = [stack.enter_context(open(f)) for f in config_files]
return yaml.safe_load(_ConcatenatedFiles(files))
@functools.lru_cache()
def _LoadConfigConstants():
"""Reads the config constants file."""
with open(data.ResourcePath(CONFIG_CONSTANTS, False)) as fp:
return fp.read()
def _GetConfigFromOverrides(overrides):
"""Converts a list of overrides into a config."""
config = {}
for override in overrides:
if override.count('=') != 1:
raise ValueError(
'--config_override flag value has incorrect number of '
'"=" characters. The value must take the form '
'fully.qualified.key=value.'
)
full_key, value = override.split('=')
keys = full_key.split('.')
new_config = {keys.pop(): yaml.safe_load(value)}
while keys:
new_config = {keys.pop(): new_config}
config = MergeConfigs(config, new_config)
return config
@functools.lru_cache()
def GetConfigFlags():
"""Returns the global flags from the user config."""
return GetDefaultAndUserConfig().get(FLAGS_KEY, {})
def GetDefaultAndUserConfig():
"""Returns the user config merged with a default config, with any overrides applied.
This loads configs from --benchmark_config_file, merges it with
default_config, which is then merged with any overrides specified via
--config_override and returns the result. These configs are different from the
BENCHMARK_CONFIG in each benchmark file, which is merged in later (during
each benchmark's 'GetConfig' method.)
Returns:
dict. The result of merging the loaded config from the
--benchmark_config_file flag with the config generated from the
--config_override flag.
"""
# Doing a single yaml parse of concatenated files might be faster but merging
# works as well.
default_config = _LoadDefaultConfig()
try:
if FLAGS.benchmark_config_file:
user_config = _LoadUserConfig(FLAGS.benchmark_config_file)
config = MergeConfigs(default_config, user_config)
else:
config = default_config
if FLAGS.config_override:
override_config = _GetConfigFromOverrides(FLAGS.config_override)
config = MergeConfigs(config, override_config)
except yaml.parser.ParserError as e:
raise errors.Config.ParseError(
'Encountered a problem loading config. Please ensure that the config '
'is valid YAML. Error received:\n%s' % e
)
except yaml.composer.ComposerError as e:
raise errors.Config.ParseError(
'Encountered a problem loading config. Please ensure that all '
'references are defined. Error received:\n%s' % e
)
return config
def MergeConfigs(default_config, override_config, warn_new_key=False):
"""Merges the override config into the default config.
This function will recursively merge two nested dicts.
The override_config represents overrides to the default_config dict, so any
leaf key/value pairs which are present in both dicts will take their value
from the override_config.
Args:
default_config: The dict which will have its values overridden.
override_config: The dict wich contains the overrides.
warn_new_key: Determines whether we warn the user if the override config has
a key that the default config did not have.
Returns:
A dict containing the values from the default_config merged with those from
the override_config.
"""
def _Merge(d1, d2):
"""Merge two nested dicts."""
merged_dict = copy.deepcopy(d1)
for k, v in d2.items():
if k not in d1:
merged_dict[k] = copy.deepcopy(v)
if warn_new_key:
logging.warning(
'The key "%s" was not in the default config, '
'but was in user overrides. This may indicate '
'a typo.',
k,
)
elif isinstance(d1[k], dict) and isinstance(v, dict):
merged_dict[k] = _Merge(d1[k], v)
else:
merged_dict[k] = v
return merged_dict
if override_config:
return _Merge(default_config, override_config)
else:
return default_config
def LoadMinimalConfig(benchmark_config, benchmark_name):
"""Loads a benchmark config without using any flags in the process.
This function will prepend configs/default_config_constants.yaml to the
benchmark config prior to loading it. This allows the config to use
references to anchors defined in the constants file.
Args:
benchmark_config: str. The default config in YAML format.
benchmark_name: str. The name of the benchmark.
Returns:
dict. The loaded config.
"""
yaml_config = []
yaml_config.append(_LoadConfigConstants())
yaml_config.append(benchmark_config)
try:
config = yaml.safe_load('\n'.join(yaml_config))
except yaml.parser.ParserError as e:
raise errors.Config.ParseError(
'Encountered a problem loading the default benchmark config. Please '
'ensure that the config is valid YAML. Error received:\n%s' % e
)
except yaml.composer.ComposerError as e:
raise errors.Config.ParseError(
'Encountered a problem loading the default benchmark config. Please '
'ensure that all references are defined. Error received:\n%s' % e
)
config = config[benchmark_name]
# yaml safe_parse parses anchor by reference and return the same
# object when the same anchor is used multiple times.
# Seralize and deserialize to make sure all objects in the dictionary are
# unique.
config = json.loads(json.dumps(config))
return config
def LoadConfig(benchmark_config, user_config, benchmark_name):
"""Loads a benchmark configuration.
This function loads a benchmark's default configuration (in YAML format),
then merges it with any overrides the user provided, and returns the result.
This loaded config is then passed to the benchmark_spec.BenchmarkSpec
constructor in order to create a BenchmarkSpec.
Args:
benchmark_config: str. The default configuration in YAML format.
user_config: dict. The loaded user config for the benchmark.
benchmark_name: str. The name of the benchmark.
Returns:
dict. The loaded config.
"""
config = LoadMinimalConfig(benchmark_config, benchmark_name)
config = MergeConfigs(config, user_config, warn_new_key=True)
return config