perfkitbenchmarker/providers/gcp/gcp_relational_db.py (401 lines of code) (raw):

# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Managed relational database provisioning for GCP. As of June 2017 to make this benchmark run for GCP you must install the gcloud beta component. This is necessary because creating a Cloud SQL instance with a non-default storage size is in beta right now. This can be removed when this feature is part of the default components. See https://cloud.google.com/sdk/gcloud/reference/beta/sql/instances/create for more information. """ import datetime import json import logging import time from absl import flags from perfkitbenchmarker import data from perfkitbenchmarker import mysql_iaas_relational_db from perfkitbenchmarker import omni_postgres_iaas_relational_db from perfkitbenchmarker import postgres_iaas_relational_db from perfkitbenchmarker import provider_info from perfkitbenchmarker import relational_db from perfkitbenchmarker import sql_engine_utils from perfkitbenchmarker import sqlserver_iaas_relational_db from perfkitbenchmarker import timescaledb_iaas_relational_db from perfkitbenchmarker import vm_util from perfkitbenchmarker.providers.gcp import gce_network from perfkitbenchmarker.providers.gcp import util FLAGS = flags.FLAGS GCP_DATABASE_VERSION_MAPPING = { sql_engine_utils.MYSQL: { '5.5': 'MYSQL_5_5', '5.6': 'MYSQL_5_6', '5.7': 'MYSQL_5_7', '8.0': 'MYSQL_8_0', '8.0.31': 'MYSQL_8_0_31', }, sql_engine_utils.POSTGRES: { '9.6': 'POSTGRES_9_6', '10': 'POSTGRES_10', '11': 'POSTGRES_11', '12': 'POSTGRES_12', '13': 'POSTGRES_13', '14': 'POSTGRES_14', '15': 'POSTGRES_15', '16': 'POSTGRES_16', '17': 'POSTGRES_17', }, sql_engine_utils.SQLSERVER: { '2017_Standard': 'SQLSERVER_2017_Standard', '2017_Enterprise': 'SQLSERVER_2017_ENTERPRISE', '2017_Express': 'SQLSERVER_2017_EXPRESS', '2017_Web': 'SQLSERVER_2017_WEB', '2019_Standard': 'SQLSERVER_2019_Standard', '2019_Enterprise': 'SQLSERVER_2019_ENTERPRISE', '2019_Express': 'SQLSERVER_2019_EXPRESS', '2019_Web': 'SQLSERVER_2019_WEB', }, } DEFAULT_MYSQL_VERSION = '5.7' DEFAULT_POSTGRES_VERSION = '9.6' DEFAULT_SQL_SERVER_VERSION = '2017_Standard' DEFAULT_ENGINE_VERSIONS = { sql_engine_utils.MYSQL: DEFAULT_MYSQL_VERSION, sql_engine_utils.POSTGRES: DEFAULT_POSTGRES_VERSION, sql_engine_utils.SQLSERVER: DEFAULT_SQL_SERVER_VERSION, } # TODO(chunla): Move to engine specific module DEFAULT_USERNAME = { sql_engine_utils.MYSQL: 'root', sql_engine_utils.POSTGRES: 'postgres', sql_engine_utils.SQLSERVER: 'sqlserver', } # PostgreSQL restrictions on memory. # Source: https://cloud.google.com/sql/docs/postgres/instance-settings. CUSTOM_MACHINE_CPU_MEM_RATIO_LOWER_BOUND = 0.9 CUSTOM_MACHINE_CPU_MEM_RATIO_UPPER_BOUND = 6.5 MIN_CUSTOM_MACHINE_MEM_MB = 3840 IS_READY_TIMEOUT = 600 # 10 minutes DELETE_INSTANCE_TIMEOUT = 600 # 10 minutes CREATION_TIMEOUT = 1200 # 20 minutes class UnsupportedDatabaseEngineError(Exception): pass class GCPSQLServerIAASRelationalDb( sqlserver_iaas_relational_db.SQLServerIAASRelationalDb ): """A GCP IAAS database resource.""" CLOUD = provider_info.GCP def __init__(self, relational_db_spec): super().__init__(relational_db_spec) self._reserved_ip_address = None def CreateIpReservation(self) -> str: ip_address_name = 'fci-ip-{}'.format(FLAGS.run_uri) self._reserved_ip_address = gce_network.GceIPAddress( self.server_vm.project, util.GetRegionFromZone(self.server_vm.zone), ip_address_name, self.server_vm.network.primary_subnet_name, ) self._reserved_ip_address.Create() return self._reserved_ip_address.ip_address def ReleaseIpReservation(self) -> bool: if self._reserved_ip_address: self._reserved_ip_address.Delete() return self._reserved_ip_address is None def _Delete(self): super()._Delete() if self._reserved_ip_address: self._reserved_ip_address.Delete() class GCPPostgresIAASRelationalDb( postgres_iaas_relational_db.PostgresIAASRelationalDb ): """A GCP IAAS database resource.""" CLOUD = provider_info.GCP class GCPMysqlIAASRelationalDb(mysql_iaas_relational_db.MysqlIAASRelationalDb): """A GCP IAAS database resource.""" CLOUD = provider_info.GCP class GCPMariaDbIAASRelationalDb( mysql_iaas_relational_db.MariaDbIAASRelationalDB ): """A GCP IAAS database resource.""" CLOUD = provider_info.GCP class GCPOmniPostgresIAASRelationalDb( omni_postgres_iaas_relational_db.OmniPostgresIAASRelationalDb ): """A GCP Omni Postgres IAAS database resource.""" CLOUD = provider_info.GCP class GCPTimescaleDbPostgresIAASRelationalDb( timescaledb_iaas_relational_db.TimescaleDbIAASRelationalDb ): """A TimescaleDB Postgres IAAS database resource.""" CLOUD = provider_info.GCP class GCPRelationalDb(relational_db.BaseRelationalDb): """A GCP CloudSQL database resource. This class contains logic required to provision and teardown the database. Currently, the database will be open to the world (0.0.0.0/0) which is not ideal; however, a password is still required to connect. Currently only MySQL 5.7 and Postgres 9.6 are supported. """ CLOUD = provider_info.GCP IS_MANAGED = True def __init__(self, relational_db_spec): super().__init__(relational_db_spec) self.project = FLAGS.project or util.GetDefaultProject() def _CreateDependencies(self): util.SetupPrivateServicesAccess( self.client_vm.network.network_resource.name, self.project ) def _CreateGcloudSqlInstance(self): storage_size = self.spec.db_disk_spec.disk_size instance_zone = self.spec.db_spec.zone database_version_string = self._GetEngineVersionString( self.spec.engine, self.spec.engine_version ) cmd_string = [ self, 'sql', 'instances', 'create', self.instance_id, '--quiet', '--format=json', '--activation-policy=ALWAYS', '--no-assign-ip', '--network=%s' % self.client_vm.network.network_resource.name, '--allocated-ip-range-name=google-service-range', '--zone=%s' % instance_zone, '--database-version=%s' % database_version_string, '--storage-size=%d' % storage_size, '--labels=%s' % util.MakeFormattedDefaultTags(), ] if self.spec.engine == sql_engine_utils.SQLSERVER: # `--root-password` is required when creating SQL Server instances. cmd_string.append( '--root-password={}'.format(self.spec.database_password) ) if self.spec.db_spec.cpus and self.spec.db_spec.memory: self._ValidateSpec() memory = self.spec.db_spec.memory cpus = self.spec.db_spec.cpus self._ValidateMachineType(memory, cpus) cmd_string.append('--cpu={}'.format(cpus)) cmd_string.append('--memory={}MiB'.format(memory)) elif hasattr(self.spec.db_spec, 'machine_type'): machine_type_flag = '--tier=%s' % self.spec.db_spec.machine_type cmd_string.append(machine_type_flag) else: raise RuntimeError('Unspecified machine type') if self.spec.high_availability: cmd_string.append(self._GetHighAvailabilityFlag()) if self.spec.backup_enabled: cmd_string.append('--backup') cmd_string.append('--retained-backups-count=2') cmd_string.append('--retained-transaction-log-days=1') if self.spec.engine == sql_engine_utils.MYSQL: cmd_string.append('--enable-bin-log') else: cmd_string.append('--enable-point-in-time-recovery') else: cmd_string.append('--no-backup') cmd = util.GcloudCommand(*cmd_string) cmd.flags['project'] = self.project cmd.use_beta_gcloud = True if self.spec.db_tier: cmd.flags['edition'] = self.spec.db_tier cmd.use_alpha_gcloud = True cmd.use_beta_gcloud = False if relational_db.ENABLE_DATA_CACHE.value: cmd.flags['enable-data-cache'] = True else: cmd.flags['no-enable-data-cache'] = True _, stderr, retcode = cmd.Issue(timeout=CREATION_TIMEOUT) util.CheckGcloudResponseKnownFailures(stderr, retcode) def _Create(self): """Creates the Cloud SQL instance and authorizes traffic from anywhere. Raises: UnsupportedDatabaseEngineError: if the database is unmanaged and the engine isn't MYSQL. Exception: if an invalid MySQL flag was used. """ self._CreateGcloudSqlInstance() def _GetHighAvailabilityFlag(self): """Returns a flag that enables high-availability. Returns: Flag (as string) to be appended to the gcloud sql create command. """ return '--availability-type=REGIONAL' def _ValidateSpec(self): """Validates PostgreSQL spec for CPU and memory. Raises: data.ResourceNotFound: On missing memory or cpus in postgres benchmark config. """ if not hasattr(self.spec.db_spec, 'cpus') or not self.spec.db_spec.cpus: raise data.ResourceNotFound( 'Must specify cpu count in benchmark config. See https://' 'cloud.google.com/sql/docs/postgres/instance-settings for more ' 'details about size restrictions.' ) if not hasattr(self.spec.db_spec, 'memory') or not self.spec.db_spec.memory: raise data.ResourceNotFound( 'Must specify a memory amount in benchmark config. See https://' 'cloud.google.com/sql/docs/postgres/instance-settings for more ' 'details about size restrictions.' ) def _ValidateMachineType(self, memory, cpus): """Validates the custom machine type configuration. Memory and CPU must be within the parameters described here: https://cloud.google.com/sql/docs/postgres/instance-settings Args: memory: (int) in MiB cpus: (int) Raises: ValueError on invalid configuration. """ if cpus not in [1] + list(range(2, 97, 2)): raise ValueError( 'CPUs (%i) much be 1 or an even number in-between 2 and 96, ' 'inclusive.' % cpus ) if memory % 256 != 0: raise ValueError( 'Total memory (%dMiB) for a custom machine must be a multiple' 'of 256MiB.' % memory ) ratio = memory / 1024.0 / cpus if ( ratio < CUSTOM_MACHINE_CPU_MEM_RATIO_LOWER_BOUND or ratio > CUSTOM_MACHINE_CPU_MEM_RATIO_UPPER_BOUND ): raise ValueError( 'The memory (%.2fGiB) per vCPU (%d) of a custom machine ' 'type must be between %.2f GiB and %.2f GiB per vCPU, ' 'inclusive.' % ( memory / 1024.0, cpus, CUSTOM_MACHINE_CPU_MEM_RATIO_LOWER_BOUND, CUSTOM_MACHINE_CPU_MEM_RATIO_UPPER_BOUND, ) ) if memory < MIN_CUSTOM_MACHINE_MEM_MB: raise ValueError( 'The total memory (%dMiB) for a custom machine type' 'must be at least %dMiB.' % (memory, MIN_CUSTOM_MACHINE_MEM_MB) ) def _Delete(self): """Deletes the underlying resource. Implementations of this method should be idempotent since it may be called multiple times, even if the resource has already been deleted. """ if hasattr(self, 'replica_instance_id'): cmd = util.GcloudCommand( self, 'sql', 'instances', 'delete', self.replica_instance_id, '--quiet', ) cmd.Issue(raise_on_failure=False, timeout=DELETE_INSTANCE_TIMEOUT) cmd = util.GcloudCommand( self, 'sql', 'instances', 'delete', self.instance_id, '--quiet', '--async', ) cmd.Issue(raise_on_failure=False, timeout=DELETE_INSTANCE_TIMEOUT) def _Exists(self): """Returns true if the underlying resource exists. Supplying this method is optional. If it is not implemented then the default is to assume success when _Create and _Delete do not raise exceptions. """ cmd = util.GcloudCommand( self, 'sql', 'instances', 'describe', self.instance_id ) stdout, _, _ = cmd.Issue(raise_on_failure=False) try: json_output = json.loads(stdout) return json_output['kind'] == 'sql#instance' except: # pylint: disable=bare-except return False def _IsDBInstanceReady(self, instance_id, timeout=IS_READY_TIMEOUT): cmd = util.GcloudCommand(self, 'sql', 'instances', 'describe', instance_id) start_time = datetime.datetime.now() while True: if (datetime.datetime.now() - start_time).seconds > timeout: logging.exception('Timeout waiting for sql instance to be ready') return False stdout, _, _ = cmd.Issue(raise_on_failure=False) try: json_output = json.loads(stdout) state = json_output['state'] logging.info('Instance %s state: %s', instance_id, state) if state == 'RUNNABLE': break except: # pylint: disable=bare-except logging.exception('Error attempting to read stdout. Creation failure.') return False time.sleep(5) return True def _IsReady(self, timeout=IS_READY_TIMEOUT): """Return true if the underlying resource is ready. Supplying this method is optional. Use it when a resource can exist without being ready. If the subclass does not implement it then it just returns true. Args: timeout: how long to wait when checking if the DB is ready. Returns: True if the resource was ready in time, False if the wait timed out. """ if not self._IsDBInstanceReady(self.instance_id, timeout): return False if self.spec.high_availability and hasattr(self, 'replica_instance_id'): if not self._IsDBInstanceReady(self.replica_instance_id, timeout): return False cmd = util.GcloudCommand( self, 'sql', 'instances', 'describe', self.instance_id ) stdout, _, _ = cmd.Issue() json_output = json.loads(stdout) self.endpoint = self._ParseEndpoint(json_output) return True def _ParseEndpoint(self, describe_instance_json): """Returns the IP of the resource given the metadata as JSON. Args: describe_instance_json: JSON output. Returns: public IP address (string) """ if describe_instance_json is None: return '' try: selflink = describe_instance_json['ipAddresses'][0]['ipAddress'] except: # pylint: disable=bare-except selflink = '' logging.exception('Error attempting to read stdout. Creation failure.') return selflink @vm_util.Retry(max_retries=4, poll_interval=2) def SetManagedDatabasePassword(self): # The hostname '%' means unrestricted access from any host. cmd = util.GcloudCommand( self, 'sql', 'users', 'create', self.spec.database_username, '--host=%', '--instance={}'.format(self.instance_id), '--password={}'.format(self.spec.database_password), ) _, _, _ = cmd.Issue() # By default the empty password is a security violation. # Change the password to a non-default value. default_user = DEFAULT_USERNAME[self.spec.engine] cmd = util.GcloudCommand( self, 'sql', 'users', 'set-password', default_user, '--host=%', '--instance={}'.format(self.instance_id), '--password={}'.format(self.spec.database_password), ) _, _, _ = cmd.Issue() def _PostCreate(self): """Creates the PKB user and sets the password.""" super()._PostCreate() self.SetManagedDatabasePassword() def _ApplyDbFlags(self): cmd_string = [ self, 'sql', 'instances', 'patch', self.instance_id, '--database-flags=%s' % ','.join(FLAGS.db_flags), ] cmd = util.GcloudCommand(*cmd_string) _, stderr, _ = cmd.Issue() if stderr: # sql instance patch outputs information to stderr # Reference to GCP documentation # https://cloud.google.com/sdk/gcloud/reference/sql/instances/patch # Example output # Updated [https://sqladmin.googleapis.com/]. if 'Updated' in stderr: return raise RuntimeError('Invalid flags: %s' % stderr) self._Reboot() def _Reboot(self): cmd_string = [self, 'sql', 'instances', 'restart', self.instance_id] cmd = util.GcloudCommand(*cmd_string) cmd.Issue() if not self._IsReady(): raise RuntimeError('Instance could not be set to ready after reboot') def GetResourceMetadata(self): metadata = super().GetResourceMetadata() if relational_db.ENABLE_DATA_CACHE.value: metadata['db_flags'] = metadata.get('db_flags', []) + [ 'enable-data-cache' ] return metadata @staticmethod def GetDefaultEngineVersion(engine): """Returns the default version of a given database engine. Args: engine (string): type of database (my_sql or postgres). Returns: (string): Default version for the given database engine. """ if engine not in DEFAULT_ENGINE_VERSIONS: raise NotImplementedError( 'Default engine not specified for engine {}'.format(engine) ) return DEFAULT_ENGINE_VERSIONS[engine] @staticmethod def _GetEngineVersionString(engine, version): """Returns CloudSQL-specific version string for givin database engine. Args: engine: database engine version: engine version Returns: (string): CloudSQL-specific name for requested engine and version. Raises: NotImplementedError on invalid engine / version combination. """ if engine not in GCP_DATABASE_VERSION_MAPPING: valid_databases = ', '.join(GCP_DATABASE_VERSION_MAPPING.keys()) raise NotImplementedError( 'Database {} is not supported,supported databases include {}'.format( engine, valid_databases ) ) version_mapping = GCP_DATABASE_VERSION_MAPPING[engine] if version not in version_mapping: valid_versions = ', '.join(version_mapping.keys()) raise NotImplementedError( 'Version {} is not supported,supported versions include {}'.format( version, valid_versions ) ) return version_mapping[version] def _FailoverHA(self): """Fail over from master to replica.""" cmd_string = [ self, 'sql', 'instances', 'failover', self.instance_id, ] cmd = util.GcloudCommand(*cmd_string) cmd.flags['project'] = self.project # this command doesnt support the specifier: 'format' del cmd.flags['format'] cmd.IssueRetryable()