in cpp/perfevents/detail/AttachmentStrategy.cpp [51:162]
EventList PerCoreAttachmentStrategy::attach() {
// The list from the previous iteration of the attachment loop,
// used to calculate the delta from attempt to attempt.
auto prev_tids = ThreadList();
// The final list of event objects.
auto perf_events = EventList();
bool success = false;
// The first event on every core becomes the output for all
// other events on this core. We store their indices into perf_events here.
// (It's kinda silly but it saves us from using shared_ptr everywhere)
auto cpu_output_idxs = std::vector<size_t>(getCoreCount());
auto has_cpu_output = std::vector<bool>(getCoreCount());
for (int32_t iter = 0; iter < max_iterations_; iter++) {
auto tids = threadListFromProcFs();
if (!isWithinLimits(tids.size())) {
if (tryFallbacks()) {
iter--; // don't count fallbacks as an attachment iteration
}
continue; // try again
}
auto events = eventsForDelta(prev_tids, tids);
for (auto& evt : events) {
try {
evt.open();
} catch (std::system_error& ex) {
// check for missing thread
auto current_tids = threadListFromProcFs();
auto no_result = current_tids.end();
if (current_tids.find(evt.tid()) == no_result) {
// Thread is no longer alive, allow this failure. The dead thread
// remains in `tids`, see comment at the end of the loop.
continue;
} else {
// We don't know what's wrong, rethrow.
throw;
}
}
perf_events.push_back(std::move(evt));
size_t last_idx = perf_events.size() - 1;
// evt is gone now, get a reference to the Event in the list
auto& list_evt = perf_events.at(last_idx);
int32_t cpu = list_evt.cpu();
if (!has_cpu_output[cpu]) {
// First event on each cpu becomes the "cpu output" - all subsequent
// events on this core will be redirected to it.
cpu_output_idxs[cpu] = last_idx;
has_cpu_output[cpu] = true;
}
}
// If we have at least one process-wide event, we care about attaching to
// all currently running threads.
if (global_specs_ > 0) {
// Get the thread list again and confirm it hasn't changed.
auto end_tids = threadListFromProcFs();
if (tids == end_tids) {
// Same list, reached a fixed point, we're done here.
success = true;
break;
} else {
// Things changed, record the last list we worked with and try again.
//
// It doesn't matter that prev_tids potentially contains threads which
// are no longer alive (see try-catch above) - that's only a problem
// if the dead thread's tid is reused and we get a false positive.
// The chances of tid reusal within two iterations of this loop
// are infinitesimal.
prev_tids = std::move(tids);
continue;
}
} else {
// We are attaching to specific threads and that's all best effort.
// We don't care if any of the threads suddenly disappeared.
success = true;
break;
}
}
if (success) {
// mmap the cpu leaders and redirect all other events to them.
for (int cpu = 0; cpu < getCoreCount(); ++cpu) {
if (!perf_events.empty() && !has_cpu_output[cpu]) {
throw std::logic_error(
"Succeeded but did not assign a CPU output event for all cores");
}
// The buffer size must be 1 + 2^n number of pages.
// We choose 512KB + 1 page, should be enough for everyone (TM).
// (In practice, I see 1MB + 1 page failing with EPERM).
static constexpr auto kBufferPerCoreSz = (1 + 128) * 4096;
perf_events.at(cpu_output_idxs[cpu]).mmap(kBufferPerCoreSz);
}
for (auto& evt : perf_events) {
// skip the cpu leaders
if (evt.buffer() != nullptr) {
continue;
}
auto& cpu_evt = perf_events.at(cpu_output_idxs[evt.cpu()]);
evt.setOutput(cpu_evt);
}
return perf_events;
} else {
return EventList();
}
}