perfkitbenchmarker/linux_benchmarks/unmanaged_mysql_sysbench_benchmark.py (165 lines of code) (raw):
# Copyright 2024 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Sysbench Benchmark for unmanaged MySQL db on a VM.
This benchmark measures performance of Sysbench Databases on unmanaged MySQL.
"""
import copy
import logging
import time
from absl import flags
from perfkitbenchmarker import background_tasks
from perfkitbenchmarker import benchmark_spec as bm_spec
from perfkitbenchmarker import configs
from perfkitbenchmarker import errors
from perfkitbenchmarker import sample
from perfkitbenchmarker.linux_packages import mysql80
from perfkitbenchmarker.linux_packages import sysbench
FLAGS = flags.FLAGS
BENCHMARK_NAME = 'unmanaged_mysql_sysbench'
BENCHMARK_CONFIG = """
unmanaged_mysql_sysbench:
description: Mysql on a VM benchmarked using Sysbench.
vm_groups:
server:
vm_spec:
GCP:
machine_type: c3-highmem-22
zone: us-east1-b
AWS:
machine_type: r7i.4xlarge
zone: us-east-1a
Azure:
machine_type: Standard_E20s_v5
zone: eastus
disk_spec:
GCP:
disk_size: 500
disk_type: hyperdisk-balanced
provisioned_iops: 160000
provisioned_throughput: 2400
num_striped_disks: 1
AWS:
disk_size: 500
disk_type: gp3
provisioned_iops: 16000
provisioned_throughput: 1000
num_striped_disks: 5
Azure:
disk_size: 200
disk_type: Premium_LRS_V2
provisioned_iops: 40000
provisioned_throughput: 800
num_striped_disks: 2
client:
vm_spec:
GCP:
machine_type: c3-standard-22
zone: us-east1-b
AWS:
machine_type: m7i.4xlarge
zone: us-east-1a
Azure:
machine_type: Standard_D16s_v5
zone: eastus
flags:
sysbench_version: df89d34c410a2277e19f77e47e535d0890b2029b
disk_fs_type: xfs
# for now we only have mysql supported
db_engine: mysql
sysbench_report_interval: 1
sysbench_ssl_mode: required
sysbench_run_threads: 1,64,128,256,512,1024,2048
sysbench_run_seconds: 300
"""
# There are 2 main customer scenarios:
# 1: 100G data set and all of that fits in memory,
# therefore only logging accesses disks.
# 2: 100G data and only 8G fits in memory,
# so both data access and logging access disks.
# Percona claims this is consistent with the alternative approach of
# increasing buffer pool and dataset to larger sizes
DEFAULT_BUFFER_POOL_SIZE = 8
# The database name is used to create a database on the server.
_DATABASE_TYPE = 'mysql'
_DATABASE_NAME = 'sysbench'
# test names
_TPCC = 'percona_tpcc'
_OLTP_READ_WRITE = 'oltp_read_write'
_OLTP_READ_ONLY = 'oltp_read_only'
_OLTP_WRITE_ONLY = 'oltp_write_only'
_OLTP = [_OLTP_READ_WRITE, _OLTP_READ_ONLY, _OLTP_WRITE_ONLY]
def GetConfig(user_config):
"""Get the benchmark config, applying user overrides.
Args:
user_config:
Returns:
Benchmark config.
"""
config = configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
# Instead of changing the default data dir of database in (multiple) configs,
# Force the scratch disk as database default dir (simpler code).
disk_spec = config['vm_groups']['server']['disk_spec']
for cloud in disk_spec:
disk_spec[cloud]['mount_point'] = '/var/lib/mysql'
# Update machine type for server/client.
if FLAGS.db_machine_type:
vm_spec = config['vm_groups']['server']['vm_spec']
for cloud in vm_spec:
vm_spec[cloud]['machine_type'] = FLAGS.db_machine_type
if FLAGS.client_vm_machine_type:
vm_spec = config['vm_groups']['client']['vm_spec']
for cloud in vm_spec:
vm_spec[cloud]['machine_type'] = FLAGS.client_vm_machine_type
# Add replica servers if configured.
if FLAGS.db_high_availability:
for index, zone in enumerate(FLAGS.db_replica_zones):
replica = copy.deepcopy(config['vm_groups']['server'])
for cloud in replica['vm_spec']:
replica['vm_spec'][cloud]['zone'] = zone
config['vm_groups'][f'replica_{index}'] = replica
return config
def _GetPassword():
"""Generate a password for this session."""
return FLAGS.run_uri + '_P3rfk1tbenchm4rker#'
def GetSysbenchParameters(primary_server_ip: str | None, password: str):
"""Get sysbench parameters from flags."""
sysbench_parameters = sysbench.SysbenchInputParameters(
db_driver=_DATABASE_TYPE,
tables=FLAGS.sysbench_tables,
threads=FLAGS.sysbench_load_threads,
report_interval=FLAGS.sysbench_report_interval,
db_user=_DATABASE_NAME,
db_password=password,
db_name=_DATABASE_NAME,
host_ip=primary_server_ip,
ssl_setting=sysbench.SYSBENCH_SSL_MODE.value,
)
test = FLAGS.sysbench_testname
if test in _OLTP:
sysbench_parameters.built_in_test = True
sysbench_parameters.test = f'{sysbench.LUA_SCRIPT_PATH}{test}.lua'
sysbench_parameters.db_ps_mode = 'disable'
sysbench_parameters.skip_trx = True
sysbench_parameters.table_size = FLAGS.sysbench_table_size
# oltp tests on mysql require ignoring errors.
# https://github.com/akopytov/sysbench/issues/253
sysbench_parameters.mysql_ignore_errors = 'all'
elif test == _TPCC:
sysbench_parameters.custom_lua_packages_path = '/opt/sysbench-tpcc/?.lua'
sysbench_parameters.built_in_test = False
sysbench_parameters.test = '/opt/sysbench-tpcc/tpcc.lua'
sysbench_parameters.scale = FLAGS.sysbench_scale
sysbench_parameters.use_fk = FLAGS.sysbench_use_fk
sysbench_parameters.trx_level = FLAGS.sysbench_txn_isolation_level
else:
raise errors.Setup.InvalidConfigurationError(
f'Test --sysbench_testname={FLAGS.sysbench_testname} is not supported.'
)
return sysbench_parameters
def GetBufferPoolSize():
"""Get buffer pool size from flags."""
if FLAGS.innodb_buffer_pool_size:
return f'{FLAGS.innodb_buffer_pool_size}G'
return f'{DEFAULT_BUFFER_POOL_SIZE}G'
def Prepare(benchmark_spec: bm_spec.BenchmarkSpec):
"""Prepare the servers and clients for the benchmark run.
Args:
benchmark_spec:
"""
vms = benchmark_spec.vms
background_tasks.RunThreaded(mysql80.ConfigureSystemSettings, vms)
background_tasks.RunThreaded(lambda vm: vm.Install('mysql80'), vms)
buffer_pool_size = GetBufferPoolSize()
primary_server = benchmark_spec.vm_groups['server'][0]
replica_servers = []
for vm in benchmark_spec.vm_groups:
if vm.startswith('replica'):
replica_servers += benchmark_spec.vm_groups[vm]
servers = [primary_server] + replica_servers
new_password = FLAGS.run_uri + '_P3rfk1tbenchm4rker#'
for index, server in enumerate(servers):
# mysql server ids needs to be positive integers.
mysql80.ConfigureAndRestart(server, buffer_pool_size, index + 1)
mysql80.UpdatePassword(server, new_password)
mysql80.CreateDatabase(server, new_password, _DATABASE_NAME)
assert primary_server.internal_ip
for replica in replica_servers:
mysql80.SetupReplica(replica, new_password, primary_server.internal_ip)
clients = benchmark_spec.vm_groups['client']
for client in clients:
client.InstallPackages('git')
client.Install('sysbench')
if FLAGS.sysbench_testname == _TPCC:
client.RemoteCommand(
'cd /opt && sudo rm -fr sysbench-tpcc && '
f'sudo git clone {sysbench.SYSBENCH_TPCC_REPRO}'
)
loader_vm = benchmark_spec.vm_groups['client'][0]
sysbench_parameters = GetSysbenchParameters(
primary_server.internal_ip, new_password)
cmd = sysbench.BuildLoadCommand(sysbench_parameters)
logging.info('%s load command: %s', FLAGS.sysbench_testname, cmd)
loader_vm.RemoteCommand(cmd)
def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]:
"""Run the sysbench benchmark and publish results.
Args:
benchmark_spec: The benchmark specification. Contains all data that is
required to run the benchmark.
Returns:
Results.
"""
primary_server = benchmark_spec.vm_groups['server'][0]
client = benchmark_spec.vm_groups['client'][0]
sysbench_parameters = GetSysbenchParameters(
primary_server.internal_ip, _GetPassword())
results = []
# a map of trasactions metric name to current sample with max value
max_transactions = {}
for i, thread_count in enumerate(FLAGS.sysbench_run_threads):
sysbench_parameters.threads = thread_count
cmd = sysbench.BuildRunCommand(sysbench_parameters)
logging.info('%s run command: %s', FLAGS.sysbench_testname, cmd)
try:
stdout, _ = client.RemoteCommand(
cmd, timeout=2*FLAGS.sysbench_run_seconds,)
except errors.VirtualMachine.RemoteCommandError as e:
logging.exception('Failed to run sysbench command: %s', e)
continue
metadata = sysbench.GetMetadata(sysbench_parameters)
metadata.update({
'buffer_pool_size': GetBufferPoolSize(),
})
results += sysbench.ParseSysbenchTimeSeries(stdout, metadata)
results += sysbench.ParseSysbenchLatency([stdout], metadata)
current_transactions = sysbench.ParseSysbenchTransactions(stdout, metadata)
results += current_transactions
for item in current_transactions:
metric = item.metric
metric_value = item.value
current_max_sample = max_transactions.get(metric, None)
if not current_max_sample or current_max_sample.value < metric_value:
max_transactions[metric] = item
# Sleep between runs if specified and not the last run
if (
sysbench.SYSBENCH_SLEEP_BETWEEN_RUNS_SEC.value > 0
and i < len(FLAGS.sysbench_run_threads) - 1
):
logging.info(
'Sleeping for %d seconds before the next run.',
sysbench.SYSBENCH_SLEEP_BETWEEN_RUNS_SEC.value,
)
time.sleep(sysbench.SYSBENCH_SLEEP_BETWEEN_RUNS_SEC.value)
# find the max tps/qps amongst all thread counts and report as a new metric.
for item in max_transactions.values():
metadata = copy.deepcopy(item.metadata)
metadata['searched_thread_counts'] = FLAGS.sysbench_run_threads
results.append(
sample.Sample(
'max_' + item.metric, item.value, item.unit, metadata=metadata
)
)
return results
def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec):
del benchmark_spec