perfkitbenchmarker/traces/mongo_diagnostics.py (83 lines of code) (raw):

# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Collects Mongo diagnostic files for analysis. No samples will be published. """ import logging import os import uuid from absl import flags from perfkitbenchmarker import events from perfkitbenchmarker import stages from perfkitbenchmarker import vm_util from perfkitbenchmarker.traces import base_collector flags.DEFINE_boolean( 'mongo_diagnostics', False, 'Run sar (https://linux.die.net/man/1/sar) ' 'on each VM to collect system performance metrics during ' 'each benchmark run, and then download the full archive for analysis.', ) FLAGS = flags.FLAGS class _MongoDiagnosticsCollector(base_collector.BaseCollector): """sar archive collector for manual analysis. Installs sysstat and runs sar on a collection of VMs. """ def _CollectorName(self): return 'mongo_diagnostics' def _InstallCollector(self, vm): pass def _CollectorRunCommand(self, vm, collector_file): # this starts sar in the background and returns the pid cmd = ('echo "nothing to run" &>{output} & echo $!').format( output=collector_file, ) return cmd def _CollectorPostProcess(self, vm): prefix = '{0}-{1}-{2}-'.format( vm.name, str(uuid.uuid4())[:8], self._CollectorName() ) def _RunMongoshCommand(vm, command: str) -> tuple[str, str]: """Runs a mongosh command on the VM.""" return vm.RemoteCommand(f'mongosh --eval "{command}" --verbose') should_run_fsync_unlock = True try: # Step PREPARE: run fsyncLock to flush and allow safe copies # refer: # https://www.mongodb.com/docs/manual/reference/method/db.fsyncLock/ try: _RunMongoshCommand(vm, 'db.fsyncLock()') except Exception: # pylint: disable=broad-except should_run_fsync_unlock = False logging.exception( 'Failed running db.fsyncLock() on %s. This is expected on client' ' VMs, and on server VMs if Mongo has crashed. We still want to' ' collect the diagnostic data for analysis.', vm.name, ) # Step 1: download the Mongo FTDC dir # refer: # https://alexbevi.com/blog/2020/01/26/what-is-mongodb-ftdc-aka-diagnostic-dot-data data_folder = '/scratch/mongodb-data' diag_data_folder_name = 'diagnostic.data' diag_data_folder_path = f'{data_folder}/{diag_data_folder_name}' # need read access vm.RemoteCommand(f'sudo chmod -R 755 {data_folder}') vm.PullFile( f'{self.output_directory}/{prefix}{diag_data_folder_name}', diag_data_folder_path, ) # Step 2: capture mongo.conf in log output, no need to download vm.RemoteCommand('cat /etc/mongod.conf') # Step 3: download mongodb.log for analysis log_file_name = 'mongod.log' log_file_path = f'/var/log/mongodb/{log_file_name}' vm.RemoteCommand(f'sudo chmod 755 {log_file_path}') vm.PullFile( f'{self.output_directory}/{prefix}{log_file_name}', log_file_path, ) # Step CLEANUP: unlock fsyncLock if should_run_fsync_unlock: _RunMongoshCommand(vm, 'db.fsyncUnlock()') except Exception: # pylint: disable=broad-except logging.exception( 'Failed fetching Mongo diagnostics from %s. This is expected on' ' client VMs.', vm.name, ) def Register(parsed_flags): """Registers the sar collector if FLAGS.sar is set.""" if not parsed_flags.mongo_diagnostics: return output_directory = vm_util.GetTempDir() logging.debug( 'Registering mongo_diagnostics collector output to %s.', output_directory, ) if not os.path.isdir(output_directory): os.makedirs(output_directory) collector = _MongoDiagnosticsCollector( output_directory=output_directory ) events.before_phase.connect(collector.Start, stages.RUN, weak=False) events.after_phase.connect(collector.Stop, stages.RUN, weak=False)