# Copyright 2020 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Run large scale boot benchmark for virtual machines.

This benchmark measures the boot time for virtual machines. It is different from
the cluster_boot benchmark because this one scales better and is capable of
measuring boot time for a large number of machines.

The way it works is as follows:
1) benchmark spins up a variable number of launcher server VM(s) (num_vms flag).
2) launcher server VM(s) start up a server that listens for curl requests.
3) launcher server VM(s) record the system time as start time
4) launcher server VM(s) run a script to create N VMs per server.
5) VMs curl the launcher server as soon as they start up.
6) once launcher server VM(s) get a curl request, it use separate process to
   confirm connection.
7) launcher server VM(s) records the system time as end time for this VM.
8) launcher server VM(s) report the measurements
9) total provisioning time is that of the slowest VM.
10) VMs have startup scripts to shut themselves down after TIMEOUT seconds.
"""
import logging
import posixpath
import statistics
from absl import flags
from perfkitbenchmarker import background_tasks
from perfkitbenchmarker import configs
from perfkitbenchmarker import data
from perfkitbenchmarker import errors
from perfkitbenchmarker import linux_virtual_machine
from perfkitbenchmarker import os_types
from perfkitbenchmarker import sample
from perfkitbenchmarker import vm_util
from perfkitbenchmarker import windows_virtual_machine
from perfkitbenchmarker.providers.aws import util as aws_util
from perfkitbenchmarker.providers.azure import azure_virtual_machine
from perfkitbenchmarker.providers.gcp import util as gcp_util


BENCHMARK_NAME = 'large_scale_boot'
BENCHMARK_CONFIG = """
large_scale_boot:
  description: >
      Create a cluster of launcher servers,
      where each launcher server launches FLAGS.boots_per_launcher machines.
  vm_groups:
    servers:
      vm_spec:
        GCP:
          machine_type: n1-standard-2
          zone: us-central1-a
          boot_disk_type: pd-ssd
        AWS:
          machine_type: m5.large
          zone: us-east-1
        Azure:
          machine_type: Standard_D2_v3
          zone: eastus
          boot_disk_type: StandardSSD_LRS
      vm_count: 1
      os_type: debian11
    clients:
      vm_spec:
        GCP:
          machine_type: n1-standard-2
          boot_disk_type: pd-ssd
        AWS:
          machine_type: m5.large
        Azure:
          machine_type: Standard_D2_v3
          boot_disk_type: StandardSSD_LRS
      os_type: debian11
      vm_count: 1
"""

FLAGS = flags.FLAGS
flags.DEFINE_integer(
    'boots_per_launcher',
    1,
    'Number of VMs to boot per launcher server VM. Defaults to 1.',
)
flags.register_validator(
    'boots_per_launcher',
    lambda value: 1 <= value <= 1000,
    message=(
        'The number of VMs booted by each launcher '
        'should be between 1 and 1000.'
    ),
)
flags.DEFINE_string(
    'boot_os_type',
    'debian11',
    'OS to boot on the VMs. '
    'Defaults to debian11. OS on launcher server VM is set '
    'using os_type flag.',
)
flags.DEFINE_string(
    'boot_machine_type',
    'n1-standard-2',
    'Machine type to boot'
    'on the VMs. Defaults to n1-standard-2. Set machine type '
    'on launcher server VM with launcher_machine_type flag.',
)
flags.DEFINE_string(
    'launcher_machine_type',
    'n1-standard-16',
    'Machine type '
    'to launcher the VMs. Defaults to n1-standard-16. Set '
    'machine type on boot VMs with boot_machine_type flag.',
)
flags.DEFINE_boolean(
    'vms_contact_launcher',
    True,
    'Whether launched vms '
    'attempt to contact the launcher before launcher attempts '
    'to connect to them. Default to True.',
)
flags.DEFINE_boolean(
    'use_public_ip',
    False,
    'Whether launcher should contact '
    'boot vms using public ip instead of internal ip. Only '
    'applicable for vms_contact_launcher=False mode. '
    'Defaults to False.',
)

# Tag for undefined hostname, should be synced with listener_server.py script.
UNDEFINED_HOSTNAME = 'UNDEFINED'
# Tag for sequential hostname, should be synced with listener_server.py script.
SEQUENTIAL_IP = 'SEQUENTIAL_IP_{}_{}'
# remote tmp directory used for this benchmark.
_REMOTE_DIR = vm_util.VM_TMP_DIR
# boot script to use on the launcher server vms.
_BOOT_SCRIPT = 'boot_script.sh'
# local boot template to build boot script.
_BOOT_TEMPLATE = 'large_scale_boot/boot_script.sh.jinja2'
# Remote boot script path
_BOOT_PATH = posixpath.join(_REMOTE_DIR, _BOOT_SCRIPT)
# status command path.
_STATUS_SCRIPT = 'vm_status.sh'
# local status template to build status command.
_STATUS_TEMPLATE = 'large_scale_boot/vm_status.sh.jinja2'
# Remote status command path
_STATUS_PATH = posixpath.join(_REMOTE_DIR, _STATUS_SCRIPT)
# python listener server to run on launcher server vms.
_LISTENER_SERVER = 'large_scale_boot/listener_server.py'
# log for python listener server.
_LISTENER_SERVER_LOG = 'http.log'
# clean up script to use on the launcher server vms.
_CLEAN_UP_SCRIPT = 'clean_up.sh'
# local clean up template to build the clean up script
_CLEAN_UP_TEMPLATE = 'large_scale_boot/clean_up_script.jinja2'
# Remote clean up script path
_CLEAN_UP_SCRIPT_PATH = posixpath.join(_REMOTE_DIR, _CLEAN_UP_SCRIPT)
# port where listener server listens for incoming booted vms.
_PORT = 8000
# file to record the start time of the boots using system time in nanoseconds.
_START_TIME_FILE = 'start_time'
# start time file path
_START_TIME_FILE_PATH = posixpath.join(_REMOTE_DIR, _START_TIME_FILE)
# file to record the end time of the boots using system time in naneseconds.
_RESULTS_FILE = 'results'
# results file path
_RESULTS_FILE_PATH = posixpath.join(_REMOTE_DIR, _RESULTS_FILE)
# Seconds to wait for vms to boot.
_TIMEOUT_SECONDS = 60 * 10
# Seconds to deplay between polling for launcher server task complete.
_POLLING_DELAY = 3
# Naming pattern for GCP booted vms.
_BOOT_VM_NAME_PREFIX = 'booter-{launcher_name}'
# Naming pattern for Azure NICs
_BOOT_NIC_NAME_PREFIX = 'booter-nic-{run_uri}-'
# Number of azure private ips that are reserved
_AZURE_RESERVED_IPS = 5
# Status for VM being reachable at an ipaddress from another VM.
STATUS_PASSING = 'Pass'
# Status for VM marked as running by the cloud provider.
STATUS_RUNNING = 'Running'
# sha256sum for preprovisioned service account credentials.
# If not using service account credentials from preprovisioned data bucket,
# use --gcp_service_account_key_file flag to specify the same credentials.
BENCHMARK_DATA = {
    'large-scale-boot-381ea7fa0a7d.json': (
        '22cd2412f38f5b6f1615ae565cd74073deff3f30829769ec66eebb5cf9672329'
    ),
}
# default linux ssh port
_SSH_PORT = linux_virtual_machine.DEFAULT_SSH_PORT
# default windows rdp port
_RDP_PORT = windows_virtual_machine.RDP_PORT
# nano multiplier
_NANO = 1000 * 1000 * 1000


def GetAzBootVMStartIdByLauncher(launcher_name):
  """Returns the Azure boot VM id by launcher name.

  We want to keep the VM id unique across all the vms in this resource group.
  Since the ids are used later to calculate the private ip address. We have to
  skip the first few ids that will match up to reserved reserved ips.
  E.g.
    Azure reserved ip: 10.0.0.0, 10.0.0.1 ... 10.0.0.4
    Launcher VM pkb-{run_uri}-1 (id 5, ip 10.0.0.5): boot vm id 7, boot vm id 8
    Launcher VM pkb-{run_uri}-2 (id 6, ip 10.0.0.6): boot vm id 9, boot vm id 10

  Args:
    launcher_name: indexed launcher name to calculate ids for the VMs it boots.
  """
  launcher_index = int(launcher_name.split('-')[-1]) - 1
  return (
      launcher_index * FLAGS.boots_per_launcher
      + _AZURE_RESERVED_IPS
      + FLAGS.num_vms
  )


def _GetServerStartCommand(client_port, launcher_vm):
  """Returns the command to start the listener server."""
  cloud = FLAGS.cloud
  if cloud == 'GCP' and FLAGS.use_public_ip:
    vms_name_pattern = UNDEFINED_HOSTNAME
  elif cloud == 'GCP':
    vms_name_pattern = (
        '{name_pattern}-VM_ID.{zone}.c.{project}.internal'.format(
            name_pattern=_BOOT_VM_NAME_PREFIX.format(
                launcher_name=launcher_vm.name
            ),
            zone=launcher_vm.zone,
            project=FLAGS.project,
        )
    )
  elif cloud == 'AWS':
    # AWS do not have a defined vm name pattern till after vm is launched.
    vms_name_pattern = UNDEFINED_HOSTNAME
  elif cloud == 'Azure':
    if FLAGS.use_public_ip:
      public_dns = 'booter-{}-VMID.{}.cloudapp.azure.com'.format(
          FLAGS.run_uri, launcher_vm.zone
      )
    else:
      public_dns = ''
    # Azure assigns a sequential ip
    vms_name_pattern = SEQUENTIAL_IP.format(
        public_dns, GetAzBootVMStartIdByLauncher(launcher_vm.name)
    )
  return (
      'python3 {server_path} {server_name} {port} {results_path} {client_port} '
      '{use_server} {vms_name_pattern} {vms_count} {use_public_ip} '
      '> {server_log} 2>&1 &'.format(
          server_name=launcher_vm.name,
          server_path=posixpath.join(
              _REMOTE_DIR, _LISTENER_SERVER.split('/')[-1]
          ),
          port=_PORT,
          results_path=_RESULTS_FILE_PATH,
          client_port=client_port,
          use_server=FLAGS.vms_contact_launcher,
          vms_name_pattern=vms_name_pattern,
          vms_count=FLAGS.boots_per_launcher,
          server_log=_LISTENER_SERVER_LOG,
          use_public_ip=FLAGS.use_public_ip,
      )
  )


def _IsLinux():
  """Returns whether the boot vms are Linux VMs."""
  return FLAGS.boot_os_type in os_types.LINUX_OS_TYPES


class InsufficientBootsError(Exception):
  """Error thrown if there are insufficient boots during wait."""


def CheckPrerequisites(_):
  """Verifies that the required resources are present.

  Raises:
    perfkitbenchmarker.data.ResourceNotFound: On missing resource.
  """
  data.ResourcePath(_BOOT_TEMPLATE)
  data.ResourcePath(_LISTENER_SERVER)
  data.ResourcePath(_CLEAN_UP_TEMPLATE)
  if FLAGS.cloud == 'Azure' and FLAGS.vms_contact_launcher and not _IsLinux():
    raise errors.Benchmarks.PrepareException(
        'Booting Windows VMs on Azure with a start-up script is not supported. '
        'See https://github.com/Azure/azure-powershell/issues/9600.'
    )
  if FLAGS.vms_contact_launcher and FLAGS.use_public_ip:
    raise errors.Benchmarks.PrepareException(
        'After VMs contact launcher server, launcher will check connectivity '
        'of the VMs using the client address of the curl request. This option '
        'is only applicable when launcher makes the initial contact.'
    )


def GetConfig(user_config):
  """Load and updates the benchmark config with user flags.

  Args:
    user_config: user supplied configuration (flags and config file)

  Returns:
    loaded benchmark configuration
  """
  config = configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
  launcher_config = config['vm_groups']['servers']
  launcher_config['vm_count'] = FLAGS.num_vms
  launcher_config['vm_spec'][FLAGS.cloud][
      'machine_type'
  ] = FLAGS.launcher_machine_type
  booter_template = config['vm_groups']['clients']
  booter_template['os_type'] = FLAGS.boot_os_type
  booter_template['vm_spec'][FLAGS.cloud][
      'machine_type'
  ] = FLAGS.boot_machine_type
  if FLAGS.machine_type:
    raise errors.Setup.InvalidConfigurationError(
        'Do not set machine type flag as it will override both launcher and '
        'booter machine types. Use launcher_machine_type and boot_machine_type'
        'instead.'
    )
  if booter_template['vm_count'] != 1:
    raise errors.Setup.InvalidConfigurationError(
        'Booter_template is a configuration template VM. '
        'Booter count should be set by number of launchers (FLAGS.num_vms) and '
        'booters per launcher (FLAGS.boots_per_launcher).'
    )
  return config


def _BuildContext(launcher_vm, booter_template_vm):
  """Returns the context variables for Jinja2 template during rendering."""
  context = {
      'boot_machine_type': booter_template_vm.machine_type,
      'cloud': FLAGS.cloud,
      'contact_launcher': FLAGS.vms_contact_launcher,
      'launcher_vm_name': launcher_vm.name,
      'os_type': 'linux' if _IsLinux() else 'windows',
      'server_ip': launcher_vm.internal_ip,
      'server_port': _PORT,
      'start_time_file': _START_TIME_FILE_PATH,
      'timeout': _TIMEOUT_SECONDS,
      'vm_count': FLAGS.boots_per_launcher,
      'zone': launcher_vm.zone,
      'use_public_ip': '' if FLAGS.use_public_ip else 'no-',
  }
  cloud = FLAGS.cloud
  if cloud == 'GCP':
    context.update({
        'boot_disk_size': booter_template_vm.boot_disk.boot_disk_size,
        'boot_vm_name_prefix': _BOOT_VM_NAME_PREFIX.format(
            launcher_name=launcher_vm.name
        ),
        'image_family': booter_template_vm.image_family,
        'image_project': booter_template_vm.image_project,
        'gcloud_path': FLAGS.gcloud_path,
        'project': FLAGS.project,
        'tags': gcp_util.MakeFormattedDefaultTags(),
    })
  elif cloud == 'AWS':
    tags = aws_util.MakeDefaultTags()
    tags.update({'launcher_id': launcher_vm.name})
    context.update({
        'group_name': booter_template_vm.placement_group.name,
        'image': booter_template_vm.image,
        'key_name': 'perfkit-key-{}'.format(FLAGS.run_uri),
        'region': aws_util.GetRegionFromZone(launcher_vm.zone),
        'subnet_id': booter_template_vm.network.subnet.id,
        'tags': aws_util.FormatTagSpecifications('instance', tags),
    })
  elif cloud == 'Azure':
    context.update({
        'boot_vm_name_prefix': launcher_vm.name.split('-', 1)[1],
        'location': launcher_vm.region,
        'image': booter_template_vm.image,
        'storage_sku': booter_template_vm.os_disk.disk_type,
        'resource_group': launcher_vm.resource_group.name,
        'nic': _BOOT_NIC_NAME_PREFIX.format(run_uri=FLAGS.run_uri),
        'password': booter_template_vm.password,
        'start_id': GetAzBootVMStartIdByLauncher(launcher_vm.name),
    })

  return context


def _Install(launcher_vm, booter_template_vm):
  """Installs benchmark scripts and packages on the launcher vm."""
  launcher_vm.InstallCli()
  # Render boot script on launcher server VM(s)
  context = _BuildContext(launcher_vm, booter_template_vm)
  launcher_vm.RenderTemplate(
      data.ResourcePath(_BOOT_TEMPLATE), _BOOT_PATH, context
  )
  launcher_vm.RenderTemplate(
      data.ResourcePath(_STATUS_TEMPLATE), _STATUS_PATH, context
  )

  # Installs and start listener server on launcher VM(s).
  launcher_vm.InstallPackages('netcat')
  launcher_vm.PushDataFile(_LISTENER_SERVER, _REMOTE_DIR)
  client_port = _SSH_PORT if _IsLinux() else _RDP_PORT
  launcher_vm.RemoteCommand('touch log')
  launcher_vm.RemoteCommand(_GetServerStartCommand(client_port, launcher_vm))
  # Render clean up script on launcher server VM(s).
  launcher_vm.RenderTemplate(
      data.ResourcePath(_CLEAN_UP_TEMPLATE), _CLEAN_UP_SCRIPT_PATH, context
  )


def Prepare(benchmark_spec):
  """Prepare the launcher server vm(s).

  Prepare the launcher server vm(s) by:
  1) Build the script that each launcher server will use to kick off boot.
  2) Start a listening server to wait for booting vms.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.
  """
  benchmark_spec.always_call_cleanup = True
  launcher_vms = benchmark_spec.vm_groups['servers']
  booter_template_vm = benchmark_spec.vm_groups['clients'][0]
  # Setup account/IAM credentials/permissions on launcher servers.
  if FLAGS.cloud == 'GCP':
    for vm in launcher_vms:
      gcp_util.AuthenticateServiceAccount(vm, benchmark=BENCHMARK_NAME)

  # fail early if launched VMs exceeds more than 50 per vcpu.
  # High CPU usage can negatively impact measured boot times.
  if FLAGS.boots_per_launcher > (launcher_vms[0].num_cpus * 50):
    raise errors.Setup.InvalidConfigurationError(
        'Each launcher server VM is launching too many VMs. '
        'Increase launcher server VM size or decrease boots_per_launcher. '
        'For a VM with {} CPUs, launch at most {} VMs.'.format(
            launcher_vms[0].num_cpus, launcher_vms[0].num_cpus * 50
        )
    )

  if FLAGS.cloud == 'Azure':
    used_private_ips = _AZURE_RESERVED_IPS + FLAGS.num_vms
    for i in range(used_private_ips, used_private_ips + _GetExpectedBoots()):
      nic_name_prefix = _BOOT_NIC_NAME_PREFIX.format(run_uri=FLAGS.run_uri)
      private_ip = '10.0.{octet3}.{octet4}'.format(
          octet3=i // 256, octet4=i % 256
      )
      public_ip_name = ''
      if FLAGS.use_public_ip:
        public_ip = azure_virtual_machine.AzurePublicIPAddress(
            launcher_vms[0].region,
            launcher_vms[0].availability_zone,
            '{}-public-ip'.format(i),
            'booter-{}-{}'.format(FLAGS.run_uri, i),
        )
        public_ip.Create()
        public_ip_name = public_ip.name
      nic = azure_virtual_machine.AzureNIC(
          subnet=launcher_vms[0].network.subnet,
          name=nic_name_prefix + str(i),
          public_ip=public_ip_name,
          accelerated_networking=False,
          network_security_group=None,
          private_ip=private_ip,
      )
      nic.Create()

  background_tasks.RunThreaded(
      lambda vm: _Install(vm, booter_template_vm), launcher_vms
  )


def _GetExpectedBoots():
  """Return the number of expected boots."""
  return FLAGS.num_vms * FLAGS.boots_per_launcher


def _ReportRunningStatus():
  """Returns whether benchmark will report time till 'Running' status."""
  return FLAGS.boots_per_launcher == 1 and not FLAGS.vms_contact_launcher


@vm_util.Retry(
    poll_interval=_POLLING_DELAY,
    timeout=_TIMEOUT_SECONDS,
    retryable_exceptions=(InsufficientBootsError),
)
def _WaitForResponses(launcher_vms):
  """Wait for all results or server shutdown or TIMEOUT_SECONDS."""

  # if any listener server exited, stop waiting.
  def _LauncherError(vm):
    error, _ = vm.RemoteCommand(
        'grep ERROR ' + _LISTENER_SERVER_LOG, ignore_failure=True
    )
    return error

  error_str = background_tasks.RunThreaded(_LauncherError, launcher_vms)
  if any(error_str):
    raise errors.Benchmarks.RunError(
        'Some listening server errored out: %s' % error_str
    )

  def _CountState(vm, state):
    stdout, _ = vm.RemoteCommand(
        f'grep -c {state} {_RESULTS_FILE_PATH}', ignore_failure=True
    )
    try:
      return int(stdout)
    except ValueError:
      return -1

  boots = background_tasks.RunThreaded(
      lambda vm: _CountState(vm, STATUS_PASSING), launcher_vms
  )
  for vm, boot_count in zip(launcher_vms, boots):
    logging.info(
        'Launcher %s reported %d/%d booted VMs',
        vm.internal_ip,
        boot_count,
        FLAGS.boots_per_launcher,
    )
  total_running_count = 0
  if _ReportRunningStatus():
    running = background_tasks.RunThreaded(
        lambda vm: _CountState(vm, STATUS_RUNNING), launcher_vms
    )
    for vm, running_count in zip(launcher_vms, running):
      logging.info(
          'Launcher %s reported %d/%d running VMs',
          vm.internal_ip,
          running_count,
          FLAGS.boots_per_launcher,
      )
    total_running_count = sum(running)
  reporting_vms_count = sum(boots)
  if reporting_vms_count != _GetExpectedBoots() or (
      _ReportRunningStatus() and total_running_count != _GetExpectedBoots()
  ):
    raise InsufficientBootsError(
        'Launcher vms reported %d total boots. Expecting %d.'
        % (reporting_vms_count, _GetExpectedBoots())
    )


def _ParseResult(launcher_vms):
  """Parse the results on the launcher VMs and send it back.

  Boot time is the boot duration of the slowest machine.

  Args:
    launcher_vms: Launcher server VMs.

  Returns:
    A list of benchmark samples.
  """
  vm_count = 0
  slowest_time = -1
  get_starttime_cmd = 'cat {startime}'.format(startime=_START_TIME_FILE_PATH)
  get_results_cmd = 'cat {results}'.format(results=_RESULTS_FILE_PATH)
  samples = []
  common_metadata = {
      'cloud': FLAGS.cloud,
      'num_launchers': FLAGS.num_vms,
      'expected_boots_per_launcher': FLAGS.boots_per_launcher,
      'boot_os_type': FLAGS.boot_os_type,
      'boot_machine_type': FLAGS.boot_machine_type,
      'launcher_machine_type': FLAGS.launcher_machine_type,
      'vms_contact_launcher': FLAGS.vms_contact_launcher,
      'use_public_ip': FLAGS.use_public_ip,
  }
  for vm in launcher_vms:
    start_time_str, _ = vm.RemoteCommand(get_starttime_cmd)
    start_time = int(start_time_str)
    results, _ = vm.RemoteCommand(get_results_cmd)
    cur_launcher_success = 0
    cur_launcher_closed_incoming = 0
    durations = []
    time_to_running = -1
    for line in results.splitlines():
      state, _, duration = line.split(':')
      end_time = int(duration)
      if state == STATUS_PASSING:
        duration_in_ns = end_time - start_time
        durations.append(duration_in_ns)
        slowest_time = max(slowest_time, duration_in_ns)
        cur_launcher_success += 1
      elif state == STATUS_RUNNING:
        t = end_time - start_time
        time_to_running = max(time_to_running, t)
      elif state == 'Fail':
        # outgoing port was open but incoming port was closed.
        cur_launcher_closed_incoming += 1

    vm_count += cur_launcher_success
    current_metadata = {
        'zone': vm.zone,
        'launcher_successes': cur_launcher_success,
        'launcher_boot_durations_ns': durations,
        'launcher_closed_incoming': cur_launcher_closed_incoming,
    }
    current_metadata.update(common_metadata)
    samples.append(
        sample.Sample('Launcher Boot Details', -1, '', current_metadata)
    )

  mean_time = statistics.mean(durations)
  median_time = statistics.median(durations)
  samples.append(
      sample.Sample(
          'Cluster Max Boot Time', slowest_time, 'nanoseconds', common_metadata
      )
  )
  samples.append(
      sample.Sample(
          'Cluster Max Boot Sec',
          slowest_time / _NANO,
          'seconds',
          common_metadata,
      )
  )
  samples.append(
      sample.Sample(
          'Cluster Mean Boot Sec', mean_time / _NANO, 'seconds', common_metadata
      )
  )
  samples.append(
      sample.Sample(
          'Cluster Median Boot Sec',
          median_time / _NANO,
          'seconds',
          common_metadata,
      )
  )
  samples.append(
      sample.Sample(
          'Cluster Expected Boots', _GetExpectedBoots(), '', common_metadata
      )
  )
  samples.append(
      sample.Sample('Cluster Success Boots', vm_count, '', common_metadata)
  )
  samples.append(
      sample.Sample(
          'Cluster Max Time to Running',
          time_to_running,
          'nanoseconds',
          common_metadata,
      )
  )
  return samples


def Run(benchmark_spec):
  """Kick off gartner boot script on launcher server vms.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.

  Returns:
    A list of benchmark samples.
  """
  launcher_vms = benchmark_spec.vm_groups['servers']
  background_tasks.RunThreaded(
      lambda vm: vm.RemoteCommand('bash {} 2>&1 | tee log'.format(_BOOT_PATH)),
      launcher_vms,
  )
  try:
    _WaitForResponses(launcher_vms)
  except InsufficientBootsError:
    # On really large-scale boots, some failures are expected.
    logging.info('Some VMs failed to boot.')
  return _ParseResult(launcher_vms)


def Cleanup(benchmark_spec):
  """Clean up.

  Launcher VMs and booter template VM are deleted by pkb resource management.
  Boot VMs are self-destructing, but we will make a second attempt at destroying
  them anyway for good hygene.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.
  """
  launcher_vms = benchmark_spec.vm_groups['servers']
  command = 'bash {} 2>&1 | tee clean_up_log'.format(_CLEAN_UP_SCRIPT_PATH)
  background_tasks.RunThreaded(
      lambda vm: vm.RemoteCommand(command), launcher_vms
  )
