cpp/perfevents/detail/AttachmentStrategy.cpp (172 lines of code) (raw):
/**
* Copyright 2004-present, Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <profilo/perfevents/detail/AttachmentStrategy.h>
namespace facebook {
namespace perfevents {
namespace detail {
static bool tryRaiseFdLimit();
static int getCoreCount();
static int getCoreCount() {
static const int kNumCores = sysconf(_SC_NPROCESSORS_CONF);
return kNumCores;
}
PerCoreAttachmentStrategy::PerCoreAttachmentStrategy(
const EventSpecList& specs,
uint32_t fallbacks,
uint16_t max_iterations,
float open_fds_limit_ratio)
: specs_(specs), // copy
global_specs_(0),
fallbacks_(fallbacks),
used_fallbacks_(0),
max_iterations_(max_iterations),
open_fds_limit_ratio_(open_fds_limit_ratio) {
size_t global_events = 0; // process-wide events
for (auto& spec : specs) {
if (spec.isProcessWide()) {
++global_events;
}
}
global_specs_ = global_events;
}
EventList PerCoreAttachmentStrategy::attach() {
// The list from the previous iteration of the attachment loop,
// used to calculate the delta from attempt to attempt.
auto prev_tids = ThreadList();
// The final list of event objects.
auto perf_events = EventList();
bool success = false;
// The first event on every core becomes the output for all
// other events on this core. We store their indices into perf_events here.
// (It's kinda silly but it saves us from using shared_ptr everywhere)
auto cpu_output_idxs = std::vector<size_t>(getCoreCount());
auto has_cpu_output = std::vector<bool>(getCoreCount());
for (int32_t iter = 0; iter < max_iterations_; iter++) {
auto tids = threadListFromProcFs();
if (!isWithinLimits(tids.size())) {
if (tryFallbacks()) {
iter--; // don't count fallbacks as an attachment iteration
}
continue; // try again
}
auto events = eventsForDelta(prev_tids, tids);
for (auto& evt : events) {
try {
evt.open();
} catch (std::system_error& ex) {
// check for missing thread
auto current_tids = threadListFromProcFs();
auto no_result = current_tids.end();
if (current_tids.find(evt.tid()) == no_result) {
// Thread is no longer alive, allow this failure. The dead thread
// remains in `tids`, see comment at the end of the loop.
continue;
} else {
// We don't know what's wrong, rethrow.
throw;
}
}
perf_events.push_back(std::move(evt));
size_t last_idx = perf_events.size() - 1;
// evt is gone now, get a reference to the Event in the list
auto& list_evt = perf_events.at(last_idx);
int32_t cpu = list_evt.cpu();
if (!has_cpu_output[cpu]) {
// First event on each cpu becomes the "cpu output" - all subsequent
// events on this core will be redirected to it.
cpu_output_idxs[cpu] = last_idx;
has_cpu_output[cpu] = true;
}
}
// If we have at least one process-wide event, we care about attaching to
// all currently running threads.
if (global_specs_ > 0) {
// Get the thread list again and confirm it hasn't changed.
auto end_tids = threadListFromProcFs();
if (tids == end_tids) {
// Same list, reached a fixed point, we're done here.
success = true;
break;
} else {
// Things changed, record the last list we worked with and try again.
//
// It doesn't matter that prev_tids potentially contains threads which
// are no longer alive (see try-catch above) - that's only a problem
// if the dead thread's tid is reused and we get a false positive.
// The chances of tid reusal within two iterations of this loop
// are infinitesimal.
prev_tids = std::move(tids);
continue;
}
} else {
// We are attaching to specific threads and that's all best effort.
// We don't care if any of the threads suddenly disappeared.
success = true;
break;
}
}
if (success) {
// mmap the cpu leaders and redirect all other events to them.
for (int cpu = 0; cpu < getCoreCount(); ++cpu) {
if (!perf_events.empty() && !has_cpu_output[cpu]) {
throw std::logic_error(
"Succeeded but did not assign a CPU output event for all cores");
}
// The buffer size must be 1 + 2^n number of pages.
// We choose 512KB + 1 page, should be enough for everyone (TM).
// (In practice, I see 1MB + 1 page failing with EPERM).
static constexpr auto kBufferPerCoreSz = (1 + 128) * 4096;
perf_events.at(cpu_output_idxs[cpu]).mmap(kBufferPerCoreSz);
}
for (auto& evt : perf_events) {
// skip the cpu leaders
if (evt.buffer() != nullptr) {
continue;
}
auto& cpu_evt = perf_events.at(cpu_output_idxs[evt.cpu()]);
evt.setOutput(cpu_evt);
}
return perf_events;
} else {
return EventList();
}
}
static ThreadList computeDelta(
const ThreadList& prev_tids,
const ThreadList& tids) {
if (prev_tids.empty()) {
return tids;
} else {
auto delta = ThreadList();
auto no_result = prev_tids.end();
for (auto& tid : tids) {
if (prev_tids.find(tid) == no_result) {
delta.emplace(tid);
}
}
return delta;
}
}
//
// Build a list of Event objects for all threads in `tids` but not in
// `prev_tids`. If `prev_tids` is nullptr or empty, this will build a list of
// events for every thread in `tids`.
//
EventList PerCoreAttachmentStrategy::eventsForDelta(
const ThreadList& prev_tids,
const ThreadList& tids) const {
auto delta = computeDelta(prev_tids, tids);
auto events = EventList();
for (auto& spec : specs_) {
for (int32_t cpu = 0; cpu < getCoreCount(); cpu++) {
// one event per core
if (spec.isProcessWide()) {
for (auto& tid : delta) {
// per thread we know about too
events.emplace_back(spec.type, tid, cpu, true /*inherit*/);
}
} else {
// We're targeting a specific thread but we still
// need one event per core.
events.emplace_back(spec.type, spec.tid, cpu, false /*inherit*/);
}
}
}
return events;
}
bool PerCoreAttachmentStrategy::isWithinLimits(size_t tids_count) {
auto specific_specs = specs_.size() - global_specs_;
auto fds = fdListFromProcFs();
auto fds_count = fds.size();
auto coreCount = getCoreCount();
// number of fds we'll add
auto estimate_new_fds =
tids_count * coreCount * global_specs_ + // process-global
coreCount * specific_specs; // specific threads
// estimated final count
auto estimate_fds_count = fds_count + estimate_new_fds;
auto max_fds = getrlimit(RLIMIT_NOFILE);
auto internal_limit = open_fds_limit_ratio_ * max_fds.rlim_cur;
return estimate_fds_count <= internal_limit;
}
bool PerCoreAttachmentStrategy::tryFallbacks() {
if ((fallbacks_ & FALLBACK_RAISE_RLIMIT) &&
((used_fallbacks_ & FALLBACK_RAISE_RLIMIT) == 0) && tryRaiseFdLimit()) {
used_fallbacks_ |= FALLBACK_RAISE_RLIMIT;
return true;
}
return false;
}
// Update the soft file descriptor limit up to the hard limit.
// Returns true if the request succeeded,
// false if they were already the same.
static bool tryRaiseFdLimit() {
try {
auto limits = getrlimit(RLIMIT_NOFILE);
if (limits.rlim_cur == limits.rlim_max) {
// Soft limit is already up to the hard limit
return false;
}
// Raise the soft limit up to the hard limit
auto new_limits = rlimit{limits.rlim_max, limits.rlim_max};
setrlimit(RLIMIT_NOFILE, new_limits);
// Check if we actually succeeded. If we didn't, we'd keep trying on every
// attachment iteration but that's okay.
new_limits = getrlimit(RLIMIT_NOFILE);
return new_limits.rlim_cur == limits.rlim_max;
} catch (std::system_error& ex) {
return false;
}
}
} // namespace detail
} // namespace perfevents
} // namespace facebook