EventList PerCoreAttachmentStrategy::attach()

in cpp/perfevents/detail/AttachmentStrategy.cpp [51:162]
71 lines of code
16 McCabe index (conditional complexity)

EventList PerCoreAttachmentStrategy::attach() {
  // The list from the previous iteration of the attachment loop,
  // used to calculate the delta from attempt to attempt.
  auto prev_tids = ThreadList();

  // The final list of event objects.
  auto perf_events = EventList();
  bool success = false;

  // The first event on every core becomes the output for all
  // other events on this core. We store their indices into perf_events here.
  // (It's kinda silly but it saves us from using shared_ptr everywhere)
  auto cpu_output_idxs = std::vector<size_t>(getCoreCount());
  auto has_cpu_output = std::vector<bool>(getCoreCount());

  for (int32_t iter = 0; iter < max_iterations_; iter++) {
    auto tids = threadListFromProcFs();
    if (!isWithinLimits(tids.size())) {
      if (tryFallbacks()) {
        iter--; // don't count fallbacks as an attachment iteration
      }
      continue; // try again
    }

    auto events = eventsForDelta(prev_tids, tids);
    for (auto& evt : events) {
      try {
        evt.open();
      } catch (std::system_error& ex) {
        // check for missing thread
        auto current_tids = threadListFromProcFs();
        auto no_result = current_tids.end();
        if (current_tids.find(evt.tid()) == no_result) {
          // Thread is no longer alive, allow this failure. The dead thread
          // remains in `tids`, see comment at the end of the loop.
          continue;
        } else {
          // We don't know what's wrong, rethrow.
          throw;
        }
      }

      perf_events.push_back(std::move(evt));
      size_t last_idx = perf_events.size() - 1;

      // evt is gone now, get a reference to the Event in the list
      auto& list_evt = perf_events.at(last_idx);
      int32_t cpu = list_evt.cpu();
      if (!has_cpu_output[cpu]) {
        // First event on each cpu becomes the "cpu output" - all subsequent
        // events on this core will be redirected to it.
        cpu_output_idxs[cpu] = last_idx;
        has_cpu_output[cpu] = true;
      }
    }

    // If we have at least one process-wide event, we care about attaching to
    // all currently running threads.
    if (global_specs_ > 0) {
      // Get the thread list again and confirm it hasn't changed.
      auto end_tids = threadListFromProcFs();
      if (tids == end_tids) {
        // Same list, reached a fixed point, we're done here.
        success = true;
        break;
      } else {
        // Things changed, record the last list we worked with and try again.
        //
        // It doesn't matter that prev_tids potentially contains threads which
        // are no longer alive (see try-catch above) - that's only a problem
        // if the dead thread's tid is reused and we get a false positive.
        // The chances of tid reusal within two iterations of this loop
        // are infinitesimal.
        prev_tids = std::move(tids);
        continue;
      }
    } else {
      // We are attaching to specific threads and that's all best effort.
      // We don't care if any of the threads suddenly disappeared.
      success = true;
      break;
    }
  }

  if (success) {
    // mmap the cpu leaders and redirect all other events to them.
    for (int cpu = 0; cpu < getCoreCount(); ++cpu) {
      if (!perf_events.empty() && !has_cpu_output[cpu]) {
        throw std::logic_error(
            "Succeeded but did not assign a CPU output event for all cores");
      }

      // The buffer size must be 1 + 2^n number of pages.
      // We choose 512KB + 1 page, should be enough for everyone (TM).
      // (In practice, I see 1MB + 1 page failing with EPERM).
      static constexpr auto kBufferPerCoreSz = (1 + 128) * 4096;
      perf_events.at(cpu_output_idxs[cpu]).mmap(kBufferPerCoreSz);
    }
    for (auto& evt : perf_events) {
      // skip the cpu leaders
      if (evt.buffer() != nullptr) {
        continue;
      }
      auto& cpu_evt = perf_events.at(cpu_output_idxs[evt.cpu()]);
      evt.setOutput(cpu_evt);
    }

    return perf_events;
  } else {
    return EventList();
  }
}