perfkitbenchmarker/traces/mongo_diagnostics.py (83 lines of code) (raw):
# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Collects Mongo diagnostic files for analysis.
No samples will be published.
"""
import logging
import os
import uuid
from absl import flags
from perfkitbenchmarker import events
from perfkitbenchmarker import stages
from perfkitbenchmarker import vm_util
from perfkitbenchmarker.traces import base_collector
flags.DEFINE_boolean(
'mongo_diagnostics',
False,
'Run sar (https://linux.die.net/man/1/sar) '
'on each VM to collect system performance metrics during '
'each benchmark run, and then download the full archive for analysis.',
)
FLAGS = flags.FLAGS
class _MongoDiagnosticsCollector(base_collector.BaseCollector):
"""sar archive collector for manual analysis.
Installs sysstat and runs sar on a collection of VMs.
"""
def _CollectorName(self):
return 'mongo_diagnostics'
def _InstallCollector(self, vm):
pass
def _CollectorRunCommand(self, vm, collector_file):
# this starts sar in the background and returns the pid
cmd = ('echo "nothing to run" &>{output} & echo $!').format(
output=collector_file,
)
return cmd
def _CollectorPostProcess(self, vm):
prefix = '{0}-{1}-{2}-'.format(
vm.name, str(uuid.uuid4())[:8], self._CollectorName()
)
def _RunMongoshCommand(vm, command: str) -> tuple[str, str]:
"""Runs a mongosh command on the VM."""
return vm.RemoteCommand(f'mongosh --eval "{command}" --verbose')
should_run_fsync_unlock = True
try:
# Step PREPARE: run fsyncLock to flush and allow safe copies
# refer:
# https://www.mongodb.com/docs/manual/reference/method/db.fsyncLock/
try:
_RunMongoshCommand(vm, 'db.fsyncLock()')
except Exception: # pylint: disable=broad-except
should_run_fsync_unlock = False
logging.exception(
'Failed running db.fsyncLock() on %s. This is expected on client'
' VMs, and on server VMs if Mongo has crashed. We still want to'
' collect the diagnostic data for analysis.',
vm.name,
)
# Step 1: download the Mongo FTDC dir
# refer:
# https://alexbevi.com/blog/2020/01/26/what-is-mongodb-ftdc-aka-diagnostic-dot-data
data_folder = '/scratch/mongodb-data'
diag_data_folder_name = 'diagnostic.data'
diag_data_folder_path = f'{data_folder}/{diag_data_folder_name}'
# need read access
vm.RemoteCommand(f'sudo chmod -R 755 {data_folder}')
vm.PullFile(
f'{self.output_directory}/{prefix}{diag_data_folder_name}',
diag_data_folder_path,
)
# Step 2: capture mongo.conf in log output, no need to download
vm.RemoteCommand('cat /etc/mongod.conf')
# Step 3: download mongodb.log for analysis
log_file_name = 'mongod.log'
log_file_path = f'/var/log/mongodb/{log_file_name}'
vm.RemoteCommand(f'sudo chmod 755 {log_file_path}')
vm.PullFile(
f'{self.output_directory}/{prefix}{log_file_name}',
log_file_path,
)
# Step CLEANUP: unlock fsyncLock
if should_run_fsync_unlock:
_RunMongoshCommand(vm, 'db.fsyncUnlock()')
except Exception: # pylint: disable=broad-except
logging.exception(
'Failed fetching Mongo diagnostics from %s. This is expected on'
' client VMs.',
vm.name,
)
def Register(parsed_flags):
"""Registers the sar collector if FLAGS.sar is set."""
if not parsed_flags.mongo_diagnostics:
return
output_directory = vm_util.GetTempDir()
logging.debug(
'Registering mongo_diagnostics collector output to %s.',
output_directory,
)
if not os.path.isdir(output_directory):
os.makedirs(output_directory)
collector = _MongoDiagnosticsCollector(
output_directory=output_directory
)
events.before_phase.connect(collector.Start, stages.RUN, weak=False)
events.after_phase.connect(collector.Stop, stages.RUN, weak=False)