perfkitbenchmarker/linux_benchmarks/mongodb_ycsb_benchmark.py (209 lines of code) (raw):
# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Run YCSB against MongoDB.
YCSB is a load generator for many 'cloud' databases. MongoDB is a NoSQL
database.
MongoDB homepage: http://www.mongodb.org/
YCSB homepage: https://github.com/brianfrankcooper/YCSB/wiki
"""
from collections.abc import Sequence
import functools
import json
import posixpath
import re
import time
from typing import Any
from absl import flags
from perfkitbenchmarker import background_tasks
from perfkitbenchmarker import benchmark_spec as bm_spec
from perfkitbenchmarker import configs
from perfkitbenchmarker import errors
from perfkitbenchmarker import linux_virtual_machine
from perfkitbenchmarker import sample
from perfkitbenchmarker.linux_packages import mongosh
from perfkitbenchmarker.linux_packages import ycsb
flags.DEFINE_integer(
'mongodb_readahead_kb', None, 'Configure block device readahead settings.'
)
flags.DEFINE_bool(
'mongodb_primary_only', False, 'Run with a simple primary-only setup.'
)
flags.DEFINE_integer(
'mongodb_batchsize',
1,
'Client request batch size. Applies to inserts only (YCSB limitation).',
)
_MONGODB_LOG_LEVEL = flags.DEFINE_integer(
'mongodb_log_level',
None,
'MongoDB log level, verbosity increases with level',
1,
5,
)
FLAGS = flags.FLAGS
_VERSION_REGEX = r'\d+\.\d+\.\d+'
BENCHMARK_NAME = 'mongodb_ycsb'
BENCHMARK_CONFIG = """
mongodb_ycsb:
description: Run YCSB against MongoDB.
vm_groups:
primary:
vm_spec: *default_dual_core
disk_spec:
GCP:
disk_size: 500
disk_type: pd-balanced
mount_point: /scratch
AWS:
disk_size: 500
disk_type: gp3
mount_point: /scratch
Azure:
disk_size: 500
disk_type: Premium_LRS
mount_point: /scratch
vm_count: 1
secondary:
vm_spec: *default_dual_core
disk_spec:
GCP:
disk_size: 500
disk_type: pd-balanced
mount_point: /scratch
AWS:
disk_size: 500
disk_type: gp3
mount_point: /scratch
Azure:
disk_size: 500
disk_type: Premium_LRS
mount_point: /scratch
vm_count: 1
arbiter:
vm_spec: *default_dual_core
vm_count: 1
clients:
os_type: ubuntu2204 # Python 2
vm_spec: *default_dual_core
vm_count: 1
flags:
openjdk_version: 8
disk_fs_type: xfs
fstab_options: noatime
enable_transparent_hugepages: false
create_and_boot_post_task_delay: 5
"""
_LinuxVM = linux_virtual_machine.BaseLinuxVirtualMachine
def GetConfig(user_config: dict[str, Any]) -> dict[str, Any]:
"""Validates the user config dictionary."""
# Default config has 1 client, 1 primary, 1 secondary, and 1 arbiter VM.
config = configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
if FLAGS['ycsb_client_vms'].present:
config['vm_groups']['clients']['vm_count'] = FLAGS.ycsb_client_vms
primary_count = config['vm_groups']['primary']['vm_count']
if FLAGS.mongodb_primary_only:
if primary_count != 1:
raise errors.Config.InvalidValue(
'Must have exactly one primary VM when using --mongodb_primary_only.'
)
# Must have exactly zero secondary and arbiter VMs when using
# --mongodb_primary_only.
config['vm_groups']['secondary']['vm_count'] = 0
config['vm_groups']['arbiter']['vm_count'] = 0
else:
secondary_count = config['vm_groups']['secondary']['vm_count']
arbiter_count = config['vm_groups']['arbiter']['vm_count']
if any([
count != 1 for count in [primary_count, secondary_count, arbiter_count]
]):
raise errors.Config.InvalidValue(
'Must have exactly one primary, secondary, and arbiter VM.'
)
return config
def _GetDataDir(vm: _LinuxVM) -> str:
return posixpath.join(vm.GetScratchDir(), 'mongodb-data')
def _PrepareServer(vm: _LinuxVM) -> None:
"""Installs MongoDB on the server."""
vm.Install('mongodb_server')
vm.Install('mongosh')
data_dir = _GetDataDir(vm)
vm.RemoteCommand(f'sudo rm -rf {data_dir}')
vm.RemoteCommand(f'mkdir {data_dir} && chmod a+rwx {data_dir}')
vm.RemoteCommand(
f'sudo sed -i "s|dbPath:.*|dbPath: {data_dir}|"'
f' {vm.GetPathToConfig("mongodb_server")}'
)
if FLAGS.mongodb_readahead_kb is not None:
vm.SetReadAhead(
FLAGS.mongodb_readahead_kb * 2,
[d.GetDevicePath() for d in vm.scratch_disks],
)
# Settings taken from MongoDB operations checklist
vm.ApplySysctlPersistent({
'fs.file-max': 98000,
'kernel.pid_max': 64000,
'kernel.threads-max': 64000,
'vm.max_map_count': 102400,
})
# Too many connections fails if we don't set file descriptor limit higher.
vm.RemoteCommand('ulimit -n 64000 && sudo systemctl start mongod')
if _MONGODB_LOG_LEVEL.value is not None:
time.sleep(10)
mongosh.RunCommand(
vm,
f'db.setLogLevel({_MONGODB_LOG_LEVEL.value})',
)
def _PrepareArbiter(vm: _LinuxVM) -> None:
"""Installs MongoDB on the arbiter."""
vm.Install('mongodb_server')
vm.Install('mongosh')
vm.RemoteCommand('ulimit -n 64000 && sudo systemctl start mongod')
def _PrepareReplicaSet(
server_vms: Sequence[_LinuxVM], arbiter_vm: _LinuxVM
) -> None:
"""Prepares the replica set for the benchmark.
This benchmark currently uses a primary-secondary-arbiter replica set
configuration. The secondary keeps a full replica of the data while the
arbiter does not. The arbiter is still able to vote. See
https://www.mongodb.com/docs/manual/core/replica-set-architecture-three-members
for more information.
Args:
server_vms: The primary (index 0) and secondary (index 1) server VMs to use.
arbiter_vm: The arbiter VM to use.
"""
args = {
'_id': '"rs0"',
'members': [
{
'_id': 0,
'host': f'"{server_vms[0].internal_ip}:27017"',
'priority': 1,
},
{
'_id': 1,
'host': f'"{server_vms[1].internal_ip}:27017"',
'priority': 0.5,
},
{
'_id': 2,
'host': f'"{arbiter_vm.internal_ip}:27017"',
'arbiterOnly': True,
},
],
}
mongosh.RunCommand(server_vms[0], f'rs.initiate({json.dumps(args)})')
mongosh.RunCommand(server_vms[0], 'rs.conf()')
def _PrepareClient(vm: _LinuxVM) -> None:
"""Install YCSB on the client VM."""
vm.Install('ycsb')
vm.Install('mongosh')
# Disable logging for MongoDB driver, which is otherwise quite verbose.
log_config = """<configuration><root level="WARN"/></configuration>"""
vm.RemoteCommand(
"echo '{}' > {}/logback.xml".format(log_config, ycsb.YCSB_DIR)
)
def _GetMongoDbURL(benchmark_spec: bm_spec.BenchmarkSpec) -> str:
"""Returns the connection string used to connect to the instance."""
# all the connection strings here require committing to disk (journal)
# prior to client acknowledgement. See
# https://www.mongodb.com/docs/manual/reference/write-concern/#acknowledgment-behavior
primary = benchmark_spec.vm_groups['primary'][0]
if FLAGS.mongodb_primary_only:
return (
f'"mongodb://{primary.internal_ip}:27017/ycsb'
'?w=1&j=true&compression=snappy&maxPoolSize=60000"'
)
secondary = benchmark_spec.vm_groups['secondary'][0]
arbiter = benchmark_spec.vm_groups['arbiter'][0]
return (
f'"mongodb://{primary.internal_ip}:27017,'
f'{secondary.internal_ip}:27017,'
f'{arbiter.internal_ip}:27017/ycsb'
'?replicaSet=rs0&w=majority&compression=snappy"'
)
def Prepare(benchmark_spec: bm_spec.BenchmarkSpec) -> None:
"""Install MongoDB on one VM and YCSB on another.
Args:
benchmark_spec: The benchmark specification. Contains all data that is
required to run the benchmark.
"""
primary = benchmark_spec.vm_groups['primary'][0]
secondary = None
arbiter = None
clients = benchmark_spec.vm_groups['clients']
server_partials = [functools.partial(_PrepareServer, primary)]
arbiter_partial = []
client_partials = [
functools.partial(_PrepareClient, client) for client in clients
]
if not FLAGS.mongodb_primary_only:
secondary = benchmark_spec.vm_groups['secondary'][0]
arbiter = benchmark_spec.vm_groups['arbiter'][0]
server_partials += [functools.partial(_PrepareServer, secondary)]
arbiter_partial += [functools.partial(_PrepareArbiter, arbiter)]
background_tasks.RunThreaded(
(lambda f: f()), server_partials + arbiter_partial + client_partials
)
if not FLAGS.mongodb_primary_only:
_PrepareReplicaSet([primary, secondary], arbiter)
benchmark_spec.executor = ycsb.YCSBExecutor('mongodb', cp=ycsb.YCSB_DIR)
benchmark_spec.mongodb_url = _GetMongoDbURL(benchmark_spec)
benchmark_spec.mongodb_version = re.findall(
_VERSION_REGEX,
mongosh.RunCommand(primary, 'db.version()')[0],
)[-1]
load_kwargs = {
'mongodb.url': benchmark_spec.mongodb_url,
'mongodb.batchsize': 10,
'mongodb.upsert': True,
'core_workload_insertion_retry_limit': 10,
}
benchmark_spec.executor.Load(clients, load_kwargs=load_kwargs)
# Print some useful loading stats
mongosh.RunCommand(primary, 'db.stats()')
if not FLAGS.mongodb_primary_only:
mongosh.RunCommand(primary, 'rs.conf()')
primary.RemoteCommand('df -h')
def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]:
"""Run YCSB against MongoDB.
Args:
benchmark_spec: The benchmark specification. Contains all data that is
required to run the benchmark.
Returns:
A list of sample.Sample objects.
"""
run_kwargs = {
'mongodb.url': benchmark_spec.mongodb_url,
'mongodb.batchsize': FLAGS.mongodb_batchsize,
'mongodb.upsert': True,
}
samples = list(
benchmark_spec.executor.Run(
benchmark_spec.vm_groups['clients'],
run_kwargs=run_kwargs,
)
)
if FLAGS.mongodb_readahead_kb is not None:
for s in samples:
s.metadata['readahead_kb'] = FLAGS.mongodb_readahead_kb
if hasattr(benchmark_spec, 'mongodb_version'):
s.metadata['mongodb_version'] = benchmark_spec.mongodb_version
mongosh.RunTwoCommands(
benchmark_spec.vm_groups['primary'][0],
'use ycsb',
'db.usertable.stats()',
)
return samples
def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec) -> None:
"""Remove MongoDB and YCSB.
Args:
benchmark_spec: The benchmark specification. Contains all data that is
required to run the benchmark.
"""
def CleanupServer(server: _LinuxVM) -> None:
server.RemoteCommand(
'sudo service %s stop' % server.GetServiceName('mongodb_server')
)
server.RemoteCommand('rm -rf %s' % _GetDataDir(server))
CleanupServer(benchmark_spec.vm_groups['workers'][0])