perfkitbenchmarker/providers/aws/aws_disk.py (577 lines of code) (raw):

# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Module containing classes related to AWS disks. Disks can be created, deleted, attached to VMs, and detached from VMs. See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html to determine valid disk types. See http://aws.amazon.com/ebs/details/ for more information about AWS (EBS) disks. """ import dataclasses import json import logging import string import threading import time from perfkitbenchmarker import background_tasks from perfkitbenchmarker import disk from perfkitbenchmarker import errors from perfkitbenchmarker import provider_info from perfkitbenchmarker import vm_util from perfkitbenchmarker.configs import option_decoders from perfkitbenchmarker.providers.aws import util class AwsStateRetryableError(Exception): """Error for retrying when an AWS disk is in a transitional state.""" VOLUME_EXISTS_STATUSES = frozenset(['creating', 'available', 'in-use', 'error']) VOLUME_DELETED_STATUSES = frozenset(['deleting', 'deleted']) VOLUME_KNOWN_STATUSES = VOLUME_EXISTS_STATUSES | VOLUME_DELETED_STATUSES STANDARD = 'standard' GP2 = 'gp2' GP3 = 'gp3' IO1 = 'io1' IO2 = 'io2' ST1 = 'st1' SC1 = 'sc1' AWS_REMOTE_DISK_TYPES = [STANDARD, SC1, ST1, GP2, GP3, IO1, IO2] # any disk types here, consider adding them to AWS_REMOTE_DISK_TYPES as well. DISK_METADATA = { STANDARD: { disk.MEDIA: disk.HDD, disk.REPLICATION: disk.ZONE, }, GP2: { disk.MEDIA: disk.SSD, disk.REPLICATION: disk.ZONE, }, GP3: { disk.MEDIA: disk.SSD, disk.REPLICATION: disk.ZONE, }, IO1: { disk.MEDIA: disk.SSD, disk.REPLICATION: disk.ZONE, }, IO2: { disk.MEDIA: disk.SSD, disk.REPLICATION: disk.ZONE, }, ST1: {disk.MEDIA: disk.HDD, disk.REPLICATION: disk.ZONE}, SC1: {disk.MEDIA: disk.HDD, disk.REPLICATION: disk.ZONE}, } LOCAL_SSD_METADATA = { disk.MEDIA: disk.SSD, disk.REPLICATION: disk.NONE, } LOCAL_HDD_METADATA = { disk.MEDIA: disk.HDD, disk.REPLICATION: disk.NONE, } LOCAL_HDD_PREFIXES = ['d2', 'hs1', 'h1', 'c1', 'cc2', 'm1', 'm2'] # Following lists based on # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html NON_EBS_NVME_TYPES = [ 'c4', 'd2', 'f1', 'g3', 'h1', 'i3', 'm4', 'p2', 'p3', 'r4', 't2', 'x1', 'x1e', 'm1', 'm3', 'c1', 'cc2', 'c3', 'm2', 'cr1', 'r3', 'hs1', 'i2', 'g2', 't1', ] NON_LOCAL_NVME_TYPES = LOCAL_HDD_PREFIXES + [ 'c3', 'cr1', 'g2', 'i2', 'm3', 'r3', 'x1', 'x1e', ] # Following dictionary based on # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html NUM_LOCAL_VOLUMES = { 'c1.medium': 1, 'c1.xlarge': 4, 'c3.large': 2, 'c3.xlarge': 2, 'c3.2xlarge': 2, 'c3.4xlarge': 2, 'c3.8xlarge': 2, 'cc2.8xlarge': 4, 'cg1.4xlarge': 2, 'cr1.8xlarge': 2, 'g2.2xlarge': 1, 'hi1.4xlarge': 2, 'hs1.8xlarge': 24, 'i2.xlarge': 1, 'i2.2xlarge': 2, 'i2.4xlarge': 4, 'i2.8xlarge': 8, 'm1.small': 1, 'm1.medium': 1, 'm1.large': 2, 'm1.xlarge': 4, 'm2.xlarge': 1, 'm2.2xlarge': 1, 'm2.4xlarge': 2, 'm3.medium': 1, 'm3.large': 1, 'm3.xlarge': 2, 'm3.2xlarge': 2, 'r3.large': 1, 'r3.xlarge': 1, 'r3.2xlarge': 1, 'r3.4xlarge': 1, 'r3.8xlarge': 2, 'd2.xlarge': 3, 'd2.2xlarge': 6, 'd2.4xlarge': 12, 'd2.8xlarge': 24, 'd3.xlarge': 3, 'd3.2xlarge': 6, 'd3.4xlarge': 12, 'd3.8xlarge': 24, 'd3en.large': 1, 'd3en.xlarge': 2, 'd3en.2xlarge': 4, 'd3en.4xlarge': 8, 'd3en.6xlarge': 12, 'd3en.8xlarge': 16, 'd3en.12xlarge': 24, 'i3.large': 1, 'i3.xlarge': 1, 'i3.2xlarge': 1, 'i3.4xlarge': 2, 'i3.8xlarge': 4, 'i3.16xlarge': 8, 'i3.metal': 8, 'i4i.large': 1, 'i4i.xlarge': 1, 'i4i.2xlarge': 1, 'i4i.4xlarge': 1, 'i4i.8xlarge': 2, 'i4i.16xlarge': 4, 'i4i.32xlarge': 8, 'is4gen.medium': 1, 'is4gen.large': 1, 'is4gen.xlarge': 1, 'is4gen.2xlarge': 1, 'is4gen.4xlarge': 2, 'is4gen.8xlarge': 4, 'im4gn.large': 1, 'im4gn.xlarge': 1, 'im4gn.2xlarge': 1, 'im4gn.4xlarge': 1, 'im4gn.8xlarge': 2, 'im4gn.16xlarge': 4, 'i3en.large': 1, 'i3en.xlarge': 1, 'i3en.2xlarge': 2, 'i3en.3xlarge': 1, 'i3en.6xlarge': 2, 'i3en.12xlarge': 4, 'i3en.24xlarge': 8, 'i3en.metal': 8, 'c5ad.large': 1, 'c5ad.xlarge': 1, 'c5ad.2xlarge': 1, 'c5ad.4xlarge': 2, 'c5ad.8xlarge': 2, 'c5ad.12xlarge': 2, 'c5ad.16xlarge': 2, 'c5ad.24xlarge': 2, 'c5d.large': 1, 'c5d.xlarge': 1, 'c5d.2xlarge': 1, 'c5d.4xlarge': 1, 'c5d.9xlarge': 1, 'c5d.18xlarge': 2, 'c5d.24xlarge': 4, 'c5d.metal': 4, 'c6gd.large': 1, 'c6gd.xlarge': 1, 'c6gd.2xlarge': 1, 'c6gd.4xlarge': 1, 'c6gd.8xlarge': 1, 'c6gd.12xlarge': 2, 'c6gd.16xlarge': 2, 'c6gd.metal': 2, 'm5d.large': 1, 'm5d.xlarge': 1, 'm5d.2xlarge': 1, 'm5d.4xlarge': 2, 'm5d.8xlarge': 2, 'm5d.12xlarge': 2, 'm5d.24xlarge': 4, 'm5d.metal': 4, 'm6gd.large': 1, 'm6gd.xlarge': 1, 'm6gd.2xlarge': 1, 'm6gd.4xlarge': 1, 'm6gd.8xlarge': 1, 'm6gd.12xlarge': 2, 'm6gd.16xlarge': 2, 'm6gd.metal': 2, 'm7gd.medium': 1, 'm7gd.large': 1, 'm7gd.xlarge': 1, 'm7gd.2xlarge': 1, 'm7gd.4xlarge': 1, 'm7gd.8xlarge': 1, 'm7gd.12xlarge': 2, 'm7gd.16xlarge': 2, 'm7gd.metal': 2, 'm6id.large': 1, 'm6id.xlarge': 1, 'm6id.2xlarge': 1, 'm6id.4xlarge': 1, 'm6id.8xlarge': 1, 'm6id.12xlarge': 2, 'm6id.16xlarge': 2, 'm6id.24xlarge': 4, 'm6id.32xlarge': 4, 'm6id.metal': 4, 'r5d.large': 1, 'r5d.xlarge': 1, 'r5d.2xlarge': 1, 'r5d.4xlarge': 2, 'r5d.8xlarge': 2, 'r5d.12xlarge': 2, 'r5d.16xlarge': 4, 'r5d.24xlarge': 4, 'r6id.large': 1, 'r6id.xlarge': 1, 'r6id.2xlarge': 1, 'r6id.4xlarge': 1, 'r6id.8xlarge': 1, 'r6id.12xlarge': 2, 'r6id.16xlarge': 2, 'r6id.24xlarge': 4, 'r6id.32xlarge': 4, 'r6gd.large': 1, 'r6gd.xlarge': 1, 'r6gd.2xlarge': 1, 'r6gd.4xlarge': 1, 'r6gd.8xlarge': 1, 'r6gd.12xlarge': 2, 'r6gd.16xlarge': 2, 'z1d.large': 1, 'z1d.xlarge': 1, 'z1d.2xlarge': 1, 'z1d.3xlarge': 2, 'z1d.6xlarge': 1, 'z1d.12xlarge': 2, 'x1.16xlarge': 1, 'x1.32xlarge': 2, 'x1e.xlarge': 1, 'x1e.2xlarge': 1, 'x1e.4xlarge': 1, 'x1e.8xlarge': 1, 'x1e.16xlarge': 1, 'x1e.32xlarge': 2, 'f1.2xlarge': 1, 'f1.4xlarge': 1, 'f1.16xlarge': 4, 'p3dn.24xlarge': 2, 'p4d.24xlarge': 8, 'g4dn.xlarge': 1, 'g4dn.2xlarge': 1, 'g4dn.4xlarge': 1, 'g4dn.8xlarge': 1, 'g4dn.12xlarge': 1, 'g4dn.16xlarge': 1, 'g4dn.metal': 2, 'g4ad.xlarge': 1, 'g4ad.2xlarge': 1, 'g4ad.4xlarge': 1, 'g4ad.8xlarge': 1, 'g4ad.16xlarge': 2, 'g5.xlarge': 1, 'g5.2xlarge': 1, 'g5.4xlarge': 1, 'g5.8xlarge': 1, 'g5.12xlarge': 1, 'g5.16xlarge': 1, 'g5.24xlarge': 1, 'g5.48xlarge': 2, 'g6.xlarge': 1, 'g6.2xlarge': 1, 'g6.4xlarge': 1, 'g6.8xlarge': 2, 'g6.16xlarge': 2, 'gr6.4xlarge': 1, 'gr6.8xlarge': 2, 'g6.12xlarge': 4, 'g6.24xlarge': 4, 'g6.48xlarge': 8, 'p5.48xlarge': 8, 'p5en.48xlarge': 8, 'i7ie.large': 1, 'i7ie.xlarge': 1, 'i7ie.2xlarge': 2, 'i7ie.3xlarge': 1, 'i7ie.6xlarge': 2, 'i7ie.12xlarge': 4, 'i7ie.18xlarge': 6, 'i7ie.24xlarge': 8, 'i7ie.48xlarge': 16, 'i8g.large': 1, 'i8g.xlarge': 1, 'i8g.2xlarge': 1, 'i8g.4xlarge': 1, 'i8g.8xlarge': 2, 'i8g.12xlarge': 3, 'i8g.16xlarge': 4, 'i8g.24xlarge': 6, 'i8g.metal-24xl': 6, } def LocalDiskIsHDD(machine_type): """Check whether the local disks use spinning magnetic storage.""" return machine_type.split('.')[0].lower() in LOCAL_HDD_PREFIXES def LocalDriveIsNvme(machine_type): """Check if the machine type uses NVMe driver.""" return machine_type.split('.')[0].lower() not in NON_LOCAL_NVME_TYPES def EbsDriveIsNvme(machine_type): """Check if the machine type uses NVMe driver.""" instance_family = machine_type.split('.')[0].lower() return instance_family not in NON_EBS_NVME_TYPES or 'metal' in machine_type AWS = 'AWS' class AwsDiskSpec(disk.BaseDiskSpec): """Object holding the information needed to create an AwsDisk.""" create_with_vm: bool CLOUD = provider_info.AWS @classmethod def _ApplyFlags(cls, config_values, flag_values): """Modifies config options based on runtime flag values. Can be overridden by derived classes to add support for specific flags. Args: config_values: dict mapping config option names to provided values. May be modified by this function. flag_values: flags.FlagValues. Runtime flags that may override the provided config values. """ super()._ApplyFlags(config_values, flag_values) if flag_values['aws_create_disks_with_vm'].present: config_values['create_with_vm'] = flag_values.aws_create_disks_with_vm @classmethod def _GetOptionDecoderConstructions(cls): """Gets decoder classes and constructor args for each configurable option. Returns: dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair. The pair specifies a decoder class and its __init__() keyword arguments to construct in order to decode the named option. """ result = super()._GetOptionDecoderConstructions() result.update( { 'create_with_vm': ( option_decoders.BooleanDecoder, {'default': True}, ) } ) return result @dataclasses.dataclass class AWSDiskIdentifiers: """Identifiers of an AWS disk assigned by AWS at creation time.""" volume_id: str | None path: str | None class AwsDisk(disk.BaseDisk): """Object representing an Aws Disk.""" _lock = threading.Lock() # this is a mapping of vm_id to unused alphabetical device letters. available_device_letters_by_vm = {} def __init__(self, disk_spec, zone, machine_type, disk_spec_id=None): super().__init__(disk_spec) self.iops = disk_spec.provisioned_iops self.throughput = disk_spec.provisioned_throughput self.id = None self.zone = zone self.region = util.GetRegionFromZone(zone) self.device_letter = None self.attached_vm_id = None self.attached_vm_name = None self.machine_type = machine_type if self.disk_type != disk.LOCAL: self.metadata.update(DISK_METADATA.get(self.disk_type, {})) else: self.metadata.update( LOCAL_HDD_METADATA if LocalDiskIsHDD(machine_type) else LOCAL_SSD_METADATA ) if self.iops: self.metadata['iops'] = self.iops if self.throughput: self.metadata['throughput'] = self.throughput self.disk_spec_id = disk_spec_id def IsNvme(self): if self.disk_type == disk.LOCAL: return LocalDriveIsNvme(self.machine_type) elif self.disk_type in AWS_REMOTE_DISK_TYPES: return EbsDriveIsNvme(self.machine_type) else: return False def AssignDeviceLetter(self, letter_suggestion, nvme_boot_drive_index): if LocalDriveIsNvme(self.machine_type) and EbsDriveIsNvme( self.machine_type ): first_device_letter = 'b' local_drive_number = ord(letter_suggestion) - ord(first_device_letter) logging.info('local drive number is: %d', local_drive_number) if local_drive_number < nvme_boot_drive_index: self.device_letter = letter_suggestion else: # skip the boot drive self.device_letter = chr(ord(letter_suggestion) + 1) else: self.device_letter = letter_suggestion def _Create(self): """Creates the disk.""" create_cmd = util.AWS_PREFIX + [ 'ec2', 'create-volume', '--region=%s' % self.region, '--size=%s' % self.disk_size, '--volume-type=%s' % self.disk_type, ] if not util.IsRegion(self.zone): create_cmd.append('--availability-zone=%s' % self.zone) if self.disk_type in [IO1, IO2]: create_cmd.append('--iops=%s' % self.iops) if self.disk_type == GP3 and self.iops: create_cmd.append('--iops=%s' % self.iops) if self.disk_type == GP3 and self.throughput: create_cmd.append('--throughput=%s' % self.throughput) try: self.create_disk_start_time = time.time() stdout, _, _ = vm_util.IssueCommand(create_cmd) self.create_disk_end_time = time.time() except errors.VmUtil.IssueCommandError as error: error_message = str(error) is_quota_error = 'MaxIOPSLimitExceeded' in error_message if is_quota_error: raise errors.Benchmarks.QuotaFailure(error_message) raise error response = json.loads(stdout) self.id = response['VolumeId'] util.AddDefaultTags(self.id, self.region) def _Delete(self): """Deletes the disk.""" delete_cmd = util.AWS_PREFIX + [ 'ec2', 'delete-volume', '--region=%s' % self.region, '--volume-id=%s' % self.id, ] logging.info( 'Deleting AWS volume %s. This may fail if the disk is not ' 'yet detached, but will be retried.', self.id, ) vm_util.IssueCommand(delete_cmd, raise_on_failure=False) def _Exists(self): """Returns true if the disk exists.""" describe_cmd = util.AWS_PREFIX + [ 'ec2', 'describe-volumes', '--region=%s' % self.region, '--filter=Name=volume-id,Values=%s' % self.id, ] stdout, _ = util.IssueRetryableCommand(describe_cmd) response = json.loads(stdout) volumes = response['Volumes'] assert len(volumes) < 2, 'Too many volumes.' if not volumes: return False status = volumes[0]['State'] assert status in VOLUME_KNOWN_STATUSES, status return status in VOLUME_EXISTS_STATUSES @vm_util.Retry( poll_interval=0.5, log_errors=True, retryable_exceptions=(AwsStateRetryableError,), ) def _WaitForAttachedState(self): """Returns if the state of the disk is attached. Returns: Whether the disk is in an attached state. If not, raises an error. Raises: AwsUnknownStatusError: If an unknown status is returned from AWS. AwsStateRetryableError: If the disk attach is pending. This is retried. """ describe_cmd = util.AWS_PREFIX + [ 'ec2', 'describe-volumes', '--region=%s' % self.region, '--volume-ids=%s' % self.id, ] stdout, _ = util.IssueRetryableCommand(describe_cmd) response = json.loads(stdout) status = response['Volumes'][0]['Attachments'][0]['State'] if status.lower() != 'attached': logging.info( 'Disk (id:%s) attaching to VM (id:%s) has status %s.', self.id, self.attached_vm_id, status, ) raise AwsStateRetryableError() volume_id = response['Volumes'][0]['Attachments'][0]['VolumeId'] device_name = response['Volumes'][0]['Attachments'][0]['Device'] return volume_id, device_name @classmethod def GenerateDeviceNamePrefix(cls): """Generates the device name prefix.""" return '/dev/xvd' @classmethod def GenerateDeviceLetter(cls, vm_name): """Generates the next available device letter for a given VM.""" with cls._lock: if vm_name not in cls.available_device_letters_by_vm: # AWS allows the following device names: # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/device_naming.html#available-ec2-device-names ascii_characters = list(string.ascii_lowercase) available_letters = [] first_ch = ['a', 'b', 'c', 'd'] for ch in first_ch: available_letters.extend( (ch + ascii_character) for ascii_character in ascii_characters ) # local ssds cannot use 'a' to allow for boot disk naming. # remove 'a' as an available device letter, # so that both local ssds and remote disks can share this naming # convention. ascii_characters.remove('a') # According to the mentioned above, xvdb, xvdc, xvd are not allowed ascii_characters.remove('b') ascii_characters.remove('c') ascii_characters.remove('d') # Getting xvddy and xvddz are invalid names during runtime available_letters.remove('dy') available_letters.remove('dz') available_letters.extend(ascii_characters) cls.available_device_letters_by_vm[vm_name] = set(available_letters) device_letter = min(cls.available_device_letters_by_vm[vm_name]) cls.available_device_letters_by_vm[vm_name].remove(device_letter) return device_letter def _Attach(self, vm): """Attaches the disk to a VM. Args: vm: The AwsVirtualMachine instance to which the disk will be attached. """ self.device_letter = AwsDisk.GenerateDeviceLetter(vm.name) self.attached_vm_id = vm.id self.attached_vm_name = vm.name device_name = self.GenerateDeviceNamePrefix() + self.device_letter attach_cmd = util.AWS_PREFIX + [ 'ec2', 'attach-volume', '--region=%s' % self.region, '--instance-id=%s' % vm.id, '--volume-id=%s' % self.id, '--device=%s' % device_name, ] logging.info( 'Attaching AWS volume %s. This may fail if the disk is not ' 'ready, but will be retried.', self.id, ) self.attach_start_time = time.time() vm_util.IssueCommand(attach_cmd, raise_on_failure=False) self.attach_end_time = time.time() volume_id, device_name = self._WaitForAttachedState() vm.LogDeviceByName(device_name, volume_id, device_name) if self.disk_spec_id: vm.LogDeviceByDiskSpecId(self.disk_spec_id, device_name) def _Detach(self): """Detaches the disk from a VM.""" detach_cmd = util.AWS_PREFIX + [ 'ec2', 'detach-volume', '--region=%s' % self.region, '--instance-id=%s' % self.attached_vm_id, '--volume-id=%s' % self.id, ] vm_util.IssueCommand(detach_cmd, raise_on_failure=False) with self._lock: assert self.attached_vm_name in AwsDisk.available_device_letters_by_vm AwsDisk.available_device_letters_by_vm[self.attached_vm_name].add( self.device_letter ) self.attached_vm_id = None self.attached_vm_name = None self.device_letter = None class AwsStripedDisk(disk.StripedDisk): """Object representing multiple azure disks striped together.""" def _Create(self): create_tasks = [] for disk_details in self.disks: create_tasks.append((disk_details.Create, (), {})) background_tasks.RunParallelThreads(create_tasks, max_concurrency=200) def _Attach(self, vm): attach_tasks = [] for disk_details in self.disks: attach_tasks.append((disk_details.Attach, [vm], {})) background_tasks.RunParallelThreads(attach_tasks, max_concurrency=200) def _Detach(self): detach_tasks = [] for disk_details in self.disks: detach_tasks.append((disk_details.Detach, (), {})) background_tasks.RunParallelThreads(detach_tasks, max_concurrency=200)