cpp/profiler/SamplingProfiler.cpp (408 lines of code) (raw):

/** * Copyright 2004-present, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "SamplingProfiler.h" #include "TimerManager.h" #include <abort_with_reason.h> #include <errno.h> #include <pthread.h> #include <semaphore.h> #include <setjmp.h> #include <sys/syscall.h> #include <sys/time.h> #include <sys/types.h> #include <sys/utsname.h> #include <unistd.h> #include <chrono> #include <random> #include <string> #include <fb/log.h> #include <fbjni/fbjni.h> #include <profilo/ExternalApi.h> #include <profilo/profiler/ExternalTracer.h> #include <profilo/profiler/JavaBaseTracer.h> #include <profilo/profiler/Retcode.h> #include <profilo/profiler/ThreadTimer.h> #include <profilo/LogEntry.h> #include <profilo/TraceProviders.h> #include <profilo/util/common.h> using namespace facebook::jni; namespace facebook { namespace profilo { namespace profiler { SamplingProfiler& SamplingProfiler::getInstance() { // // Despite the fact that this is accessed from a signal handler (this routine // is not async-signal safe due to the initialization lock for this variable), // this is safe. The first access will always be before the first access from // a signal context, so the variable is guaranteed to be initialized by then. // static SamplingProfiler profiler; return profiler; } void SamplingProfiler::FaultHandler( SignalHandler::HandlerScope scope, int signum, siginfo_t* siginfo, void* ucontext) { ProfileState& state = ((SamplingProfiler*)scope.GetData())->state_; uint64_t tid = threadID(); uint64_t targetBusyState = (tid << 16) | StackSlotState::BUSY_WITH_METADATA; // Find the most recent slot occupied by this thread. // This allows us to handle crashes during nested unwinding from // the most inner one out. int64_t max_time = -1; int max_idx = -1; for (int i = 0; i < MAX_STACKS_COUNT; i++) { auto& slot = state.stacks[i]; if (slot.state.load() == targetBusyState && slot.time > max_time) { max_time = slot.time; max_idx = i; } } if (max_idx >= 0) { state.errSigCrashes.fetch_add(1); scope.siglongjmp(state.stacks[max_idx].sig_jmp_buf, 1); } else { scope.CallPreviousHandler(signum, siginfo, ucontext); } } void SamplingProfiler::maybeSignalReader() { uint32_t prevSlotCounter = state_.fullSlotsCounter.fetch_add(1); if ((prevSlotCounter + 1) % FLUSH_STACKS_COUNT == 0) { int res = sem_post(&state_.slotsCounterSem); if (res != 0) { abort(); // Something went wrong } } } // Finds the next FREE slot and atomically sets its state to BUSY, so that // the acquiring thread can safely write to it, and returns the index via // <outSlot>. Returns true if a FREE slot was found, false otherwise. bool getSlotIndex(ProfileState& state_, uint64_t tid, uint32_t& outSlot) { auto slotIndex = state_.currentSlot.fetch_add(1); for (int i = 0; i < MAX_STACKS_COUNT; i++) { auto nextSlotIndex = (slotIndex + i) % MAX_STACKS_COUNT; auto& slot = state_.stacks[nextSlotIndex]; uint64_t expected = StackSlotState::FREE; uint64_t targetBusyState = (tid << 16) | StackSlotState::BUSY; if (slot.state.compare_exchange_strong(expected, targetBusyState)) { outSlot = nextSlotIndex; slot.time = monotonicTime(); memset(&slot.sig_jmp_buf, 0, sizeof(slot.sig_jmp_buf)); expected = targetBusyState; targetBusyState = (tid << 16) | StackSlotState::BUSY_WITH_METADATA; if (!slot.state.compare_exchange_strong(expected, targetBusyState)) { abortWithReason( "Invariant violation - BUSY to BUSY_WITH_METADATA failed"); } return true; } } // We didn't find an empty slot, so bump our counter state_.errSlotMisses.fetch_add(1); return false; } void SamplingProfiler::UnwindStackHandler( SignalHandler::HandlerScope scope, int signum, siginfo_t* siginfo, void* ucontext) { SamplingProfiler& profiler = *(SamplingProfiler*)scope.GetData(); ProfileState& state = profiler.state_; uint64_t tid = threadID(); uint64_t busyState = (tid << 16) | StackSlotState::BUSY_WITH_METADATA; for (const auto& tracerEntry : state.tracersMap) { auto tracerType = tracerEntry.first; if (!(tracerType & state.currentTracers)) { continue; } // The external tracer is frequently disabled, so fail fast here // if that is the case if (ExternalTracer::isExternalTracer(tracerType)) { if (!static_cast<ExternalTracer*>(tracerEntry.second.get()) ->isEnabled()) { continue; } } uint32_t slotIndex; bool slot_found = getSlotIndex(state, tid, slotIndex); if (!slot_found) { // We're out of slots, no tracer is likely to succeed. break; } auto& slot = state.stacks[slotIndex]; // Can finally occupy the slot if (sigsetjmp(slot.sig_jmp_buf, 1) == 0) { memset(slot.method_names, 0, sizeof(slot.method_names)); memset(slot.class_descriptors, 0, sizeof(slot.class_descriptors)); uint8_t ret{StackSlotState::FREE}; if (JavaBaseTracer::isJavaTracer(tracerType)) { ret = reinterpret_cast<JavaBaseTracer*>(tracerEntry.second.get()) ->collectJavaStack( (ucontext_t*)ucontext, slot.frames, slot.method_names, slot.class_descriptors, slot.depth, MAX_STACK_DEPTH); } else { ret = tracerEntry.second->collectStack( (ucontext_t*)ucontext, slot.frames, slot.depth, MAX_STACK_DEPTH); } slot.profilerType = tracerType; if (StackCollectionRetcode::STACK_OVERFLOW == ret) { state.errStackOverflows.fetch_add(1); } slot.timerType = ThreadTimer::decodeType(siginfo->si_value.sival_int); // Ignore TRACER_DISABLED errors for now and free the slot. // TODO T42938550 if (StackCollectionRetcode::TRACER_DISABLED == ret) { if (!slot.state.compare_exchange_strong( busyState, StackSlotState::FREE)) { abortWithReason( "Invariant violation - BUSY_WITH_METADATA to FREE failed"); } continue; } auto nextSlotState = (tid << 16) | ret; // In case if a Tracer class handles collection on it's own the slot is // freed after the signal is processed. if (ret == StackCollectionRetcode::IGNORE) { nextSlotState = StackSlotState::FREE; } if (!slot.state.compare_exchange_strong(busyState, nextSlotState)) { // Slot was overwritten by another thread. // This is an ordering violation, so abort. abortWithReason( "Invariant violation - BUSY_WITH_METADATA to return code failed"); } if (nextSlotState != StackSlotState::FREE) { profiler.maybeSignalReader(); } } else { // We came from the longjmp in sigcatch_handler. // Something must have crashed. // Log the error information and bail out slot.time = monotonicTime(); slot.profilerType = tracerType; if (!slot.state.compare_exchange_strong( busyState, (tid << 16) | StackCollectionRetcode::SIGNAL_INTERRUPT)) { abortWithReason( "Invariant violation - BUSY_WITH_METADATA to SIGNAL_INTERRUPT failed"); } profiler.maybeSignalReader(); } } } void SamplingProfiler::registerSignalHandlers() { // // Register a handler for SIGPROF. // // Also, register a handler for SIGSEGV and SIGBUS, so that we can safely // jump away in the case of a crash in our SIGPROF handler. // signal_handlers_.sigprof = &SignalHandler::Initialize(SIGPROF, UnwindStackHandler); signal_handlers_.sigsegv = &SignalHandler::Initialize(SIGSEGV, FaultHandler); signal_handlers_.sigbus = &SignalHandler::Initialize(SIGBUS, FaultHandler); signal_handlers_.sigbus->SetData(this); signal_handlers_.sigbus->Enable(); signal_handlers_.sigsegv->SetData(this); signal_handlers_.sigsegv->Enable(); signal_handlers_.sigprof->SetData(this); signal_handlers_.sigprof->Enable(); } void SamplingProfiler::unregisterSignalHandlers() { // There are multiple cases we need to worry about: // a) currently executing profiling handlers // b) pending profiling signals // c) currently executing fault handlers // d) pending fault signals // // Observe that fault handlers return to the profiling handler and // are conceptually nested within them. // PROF_ENTER // FAULT_ENTER // FAULT_LONGJMP // PROF_EXIT // // By waiting for all profiling handlers to finish (which Disable // does internally), we solve a), c), and d) (pending fault signals during a // profiling signal means we won't exit the corresponding profiling handler // until we've handled the fault). // // We solve b) by never unregistering our signal handler. // Once registered, we will bail out on the HandlerScope::IsEnabled check and // all will be well on the normal path. // signal_handlers_.sigprof->Disable(); signal_handlers_.sigbus->Disable(); signal_handlers_.sigsegv->Disable(); } static int32_t logTimerType(StackSlot& slot, int32_t tid, MultiBufferLogger& logger) { auto type = slot.timerType == ThreadTimer::Type::CpuTime ? EntryType::CPU_STACK_SAMPLE : EntryType::WALL_STACK_SAMPLE; auto id = logger.write(StandardEntry{ .type = type, .timestamp = slot.time, .tid = tid, }); return id; } void SamplingProfiler::flushStackTraces( std::unordered_set<uint64_t>& loggedFramesSet) { int processedCount = 0; auto& logger = *state_.logger; for (size_t i = 0; i < MAX_STACKS_COUNT; i++) { auto& slot = state_.stacks[i]; uint64_t slotStateCombo = slot.state.load(); uint16_t slotState = slotStateCombo & 0xffff; if (slotState == StackSlotState::FREE || slotState == StackSlotState::BUSY || slotState == StackSlotState::BUSY_WITH_METADATA) { continue; } // Ignore remains from a previous trace if (slot.time > state_.profileStartTime) { auto& tracer = state_.tracersMap[slot.profilerType]; auto tid = slotStateCombo >> 16; logTimerType(slot, tid, logger); if (StackCollectionRetcode::SUCCESS == slotState) { tracer->flushStack(logger, slot.frames, slot.depth, tid, slot.time); } else { StackCollectionEntryConverter::logRetcode( logger, slotState, tid, slot.time, slot.profilerType); } if (JavaBaseTracer::isJavaTracer(slot.profilerType)) { for (int i = 0; i < slot.depth; i++) { bool expectedResetState = true; if (state_.resetFrameworkSymbols.compare_exchange_strong( expectedResetState, false)) { loggedFramesSet.clear(); } if (loggedFramesSet.find(slot.frames[i]) == loggedFramesSet.end() && JavaBaseTracer::isFramework(slot.class_descriptors[i])) { StandardEntry entry{}; entry.tid = tid; entry.timestamp = slot.time; entry.type = EntryType::JAVA_FRAME_NAME; entry.extra = slot.frames[i]; int32_t id = logger.write(std::move(entry)); std::string full_name{slot.class_descriptors[i]}; full_name += slot.method_names[i]; logger.writeBytes( EntryType::STRING_VALUE, id, (const uint8_t*)full_name.c_str(), full_name.length()); } // Mark the frame as "logged" or "visited" so that we don't do a // string comparison for it next time, regardless of whether it was // a framework frame or not loggedFramesSet.insert(slot.frames[i]); } } } uint64_t expected = slotStateCombo; // Release the slot if (!slot.state.compare_exchange_strong(expected, StackSlotState::FREE)) { // Slot was re-used in the middle of the processing by another thread. // Aborting. abort(); } processedCount++; } } void logProfilingErrAnnotation( MultiBufferLogger& logger, int32_t key, uint16_t value) { if (value == 0) { return; } logger.write(StandardEntry{ .id = 0, .type = EntryType::TRACE_ANNOTATION, .timestamp = monotonicTime(), .tid = threadID(), .callid = key, .matchid = 0, .extra = value, }); } /** * Initializes the profiler. Registers handler for custom defined SIGPROF * symbol which will collect traces and inits thread/process ids */ bool SamplingProfiler::initialize( MultiBufferLogger& logger, int32_t available_tracers, std::unordered_map<int32_t, std::shared_ptr<BaseTracer>> tracers) { state_.processId = getpid(); state_.logger = &logger; state_.availableTracers = available_tracers; state_.tracersMap = std::move(tracers); state_.timerManager.reset(); // Init semaphore for stacks flush to the Ring Buffer int res = sem_init(&state_.slotsCounterSem, 0, 0); if (res != 0) { FBLOGV("Can not init slotsCounterSem semaphore: %s", strerror(errno)); errno = 0; return false; } return true; } /** * Called via JNI from CPUProfiler * * Must only be called if SamplingProfiler::startProfiling() returns true. * * Waits in a loop for semaphore wakeup and then flushes the current profiling * stacks. */ void SamplingProfiler::loggerLoop() { FBLOGV("Logger thread %d is going into the loop...", threadID()); int res = 0; std::unordered_set<uint64_t> loggedFramesSet{}; do { res = sem_wait(&state_.slotsCounterSem); if (res == 0) { flushStackTraces(loggedFramesSet); } } while (!state_.isLoggerLoopDone && (res == 0 || errno == EINTR)); FBLOGV("Logger thread is shutting down..."); } bool SamplingProfiler::startProfilingTimers() { FBLOGI("Starting profiling timers w/sample rate %d", state_.samplingRateMs); state_.timerManager.reset(new TimerManager( state_.threadDetectIntervalMs, state_.samplingRateMs, state_.cpuClockModeEnabled, state_.wallClockModeEnabled, state_.wallClockModeEnabled ? state_.whitelist : nullptr)); state_.timerManager->start(); return true; } bool SamplingProfiler::stopProfilingTimers() { state_.timerManager->stop(); state_.timerManager.reset(); return true; } bool SamplingProfiler::startProfiling( int requested_tracers, int sampling_rate_ms, int thread_detect_interval_ms, bool cpu_clock_mode_enabled, bool wall_clock_mode_enabled) { if (state_.isProfiling) { throw std::logic_error("startProfiling called while already profiling"); } state_.isProfiling = true; FBLOGV("Start profiling"); registerSignalHandlers(); state_.profileStartTime = monotonicTime(); state_.currentTracers = state_.availableTracers & requested_tracers; if (state_.currentTracers == 0) { return false; } constexpr auto kMinThreadDetectIntervalMs = 7; // TODO_YM T63620953 if (thread_detect_interval_ms < kMinThreadDetectIntervalMs) { thread_detect_interval_ms = kMinThreadDetectIntervalMs; } state_.samplingRateMs = sampling_rate_ms; state_.cpuClockModeEnabled = cpu_clock_mode_enabled; state_.wallClockModeEnabled = wall_clock_mode_enabled; state_.threadDetectIntervalMs = thread_detect_interval_ms; state_.isLoggerLoopDone = false; for (const auto& tracerEntry : state_.tracersMap) { if (tracerEntry.first & state_.currentTracers) { tracerEntry.second->startTracing(); } } return startProfilingTimers(); } /** * Stop the profiler. Write collected stack traces to profilo * The value to write will be a 64 bit <method_id, dex_number>. * Unfortunately, DvmDex or DvmHeader doesn't contain a unique dex number that * we could reuse. Until this is possibly written custom by redex, we'll use * checksum for the dex identification which should collide rare. */ void SamplingProfiler::stopProfiling() { if (!state_.isProfiling) { throw std::logic_error("stopProfiling called while not profiling"); } FBLOGV("Stopping profiling"); if (!stopProfilingTimers()) { abort(); } state_.isLoggerLoopDone.store(true); int res = sem_post(&state_.slotsCounterSem); if (res != 0) { FBLOGV("Can not execute sem_post for logger thread"); errno = 0; } // Logging errors logProfilingErrAnnotation( *state_.logger, QuickLogConstants::PROF_ERR_SIG_CRASHES, state_.errSigCrashes); logProfilingErrAnnotation( *state_.logger, QuickLogConstants::PROF_ERR_SLOT_MISSES, state_.errSlotMisses); logProfilingErrAnnotation( *state_.logger, QuickLogConstants::PROF_ERR_STACK_OVERFLOWS, state_.errStackOverflows); FBLOGV( "Stack overflows = %d, Sig crashes = %d, Slot misses = %d", state_.errStackOverflows.load(), state_.errSigCrashes.load(), state_.errSlotMisses.load()); state_.currentSlot = 0; state_.errSigCrashes = 0; state_.errSlotMisses = 0; state_.errStackOverflows = 0; for (const auto& tracerEntry : state_.tracersMap) { if (tracerEntry.first & state_.currentTracers) { tracerEntry.second->stopTracing(); } } unregisterSignalHandlers(); state_.isProfiling = false; } void SamplingProfiler::addToWhitelist(int targetThread) { std::unique_lock<std::mutex> lock(state_.whitelist->whitelistedThreadsMtx); state_.whitelist->whitelistedThreads.insert( static_cast<int32_t>(targetThread)); } void SamplingProfiler::removeFromWhitelist(int targetThread) { std::unique_lock<std::mutex> lock(state_.whitelist->whitelistedThreadsMtx); state_.whitelist->whitelistedThreads.erase(targetThread); } void SamplingProfiler::resetFrameworkNamesSet() { // Let the logger loop know we should reset our cache of frames state_.resetFrameworkSymbols.store(true); } } // namespace profiler } // namespace profilo } // namespace facebook