BaseKillPlugin::KillResult BaseKillPlugin::resumeTryingToKillSomething()

in src/oomd/plugins/BaseKillPlugin.cpp [246:388]


BaseKillPlugin::KillResult BaseKillPlugin::resumeTryingToKillSomething(
    OomdContext& ctx,
    std::vector<KillCandidate> next_best_option_stack,
    bool has_tried_to_kill_something_already) {
  OCHECK_EXCEPT(
      prekill_hook_state_ == std::nullopt,
      std::runtime_error("Shouldn't be trying to kill anything while pre-kill"
                         " hook is still running"));

  std::optional<KillCandidate> first_kill_candidate = std::nullopt;

  while (!next_best_option_stack.empty()) {
    const KillCandidate candidate = next_best_option_stack.back();
    next_best_option_stack.pop_back();

    bool may_recurse =
        recursive_ && !candidate.cgroup_ctx.get().oom_group().value_or(false);
    if (may_recurse) {
      auto children = ctx.addChildrenToCacheAndGet(candidate.cgroup_ctx.get());
      if (children.size() > 0) {
        ologKillTarget(ctx, candidate.cgroup_ctx.get(), *candidate.peers);

        auto sorted =
            std::make_shared<std::vector<OomdContext::ConstCgroupContextRef>>(
                rankForKilling(ctx, children));

        OomdContext::dump(*sorted, !debug_);

        // push the lowest ranked sibling onto the next_best_option_stack first,
        // so the highest ranked sibling is on top
        reverse(sorted->begin(), sorted->end());
        for (const auto& cgroup_ctx : *sorted) {
          next_best_option_stack.emplace_back(KillCandidate{
              .cgroup_ctx = cgroup_ctx,
              // kill_root is nullopt when peers are themselves
              // the roots, in the first call. Each cgroup is then
              // its own kill_root.
              .kill_root = candidate.kill_root,
              .peers = sorted});
        }

        continue;
      }
    }

    // Skip trying to kill an empty cgroup, which would unfairly increment the
    // empty cgroup's kill counters and pollute the logs. We get into a
    // situation where we try to kill empty cgroups when a cgroup marked
    // PREFER is not the source of pressure: KillMemoryGrowth will kill the
    // PREFER cgroup first, but that won't fix the problem so it will kill
    // again; on the second time around, it first targets the now-empty PREFER
    // cgroup before moving on to a better victim.
    if (!candidate.cgroup_ctx.get().is_populated().value_or(true)) {
      if (!has_tried_to_kill_something_already && !first_kill_candidate) {
        first_kill_candidate = candidate;
      }
      continue;
    }

    ologKillTarget(ctx, candidate.cgroup_ctx.get(), *candidate.peers);

    if (!pastPrekillHookTimeout(ctx)) {
      auto hook_invocation = ctx.firePrekillHook(candidate.cgroup_ctx.get());
      if (hook_invocation && !(*hook_invocation)->didFinish()) {
        auto serialize_cgroup_ref = [&](const CgroupContext& cgroup_ctx) {
          // cgroup_ctx.id() may be nullopt, which means the cgroup is deleted
          return SerializedCgroupRef{
              .path = cgroup_ctx.cgroup(), .id = cgroup_ctx.id()};
        };

        // memoize serialize_peer_group by unserialized peer group pointer
        std::map<
            const std::vector<OomdContext::ConstCgroupContextRef>*,
            std::shared_ptr<std::vector<SerializedCgroupRef>>>
            memoized_peer_groups;
        auto serialize_peer_group =
            [&](const std::vector<OomdContext::ConstCgroupContextRef>* peers) {
              auto it = memoized_peer_groups.find(peers);
              if (it != memoized_peer_groups.end()) {
                return it->second;
              }
              auto serialized_peers =
                  std::make_shared<std::vector<SerializedCgroupRef>>();
              for (const auto& peer : *peers) {
                serialized_peers->emplace_back(serialize_cgroup_ref(peer));
              }
              memoized_peer_groups[peers] = serialized_peers;
              return serialized_peers;
            };

        auto serialize_kill_candidate = [&](const KillCandidate& kc) {
          return SerializedKillCandidate{
              .target = serialize_cgroup_ref(kc.cgroup_ctx),
              .kill_root = serialize_cgroup_ref(kc.cgroup_ctx),
              .peers = serialize_peer_group(kc.peers.get())};
        };

        prekill_hook_state_ = ActivePrekillHook{
            .hook_invocation = std::move(*hook_invocation),
            .intended_victim = serialize_kill_candidate(candidate)};

        for (KillCandidate& kc : next_best_option_stack) {
          prekill_hook_state_->next_best_option_stack.emplace_back(
              serialize_kill_candidate(kc));
        }

        return KillResult::DEFER;
      }
    }

    has_tried_to_kill_something_already = true;
    if (tryToLogAndKillCgroup(ctx, candidate)) {
      return KillResult::SUCCESS;
    }
  }

  if (!has_tried_to_kill_something_already) {
    KillUuid kill_uuid = generateKillUuid();
    auto action_context = ctx.getActionContext();

    if (first_kill_candidate.has_value()) {
      dumpKillInfo(
          first_kill_candidate.value().cgroup_ctx.get().cgroup(),
          first_kill_candidate.value().cgroup_ctx.get(),
          first_kill_candidate.value().kill_root.get(),
          action_context,
          kill_uuid,
          false,
          dry_);
    } else {
      dumpKillInfo(
          CgroupPath{"", ""},
          std::nullopt,
          std::nullopt,
          action_context,
          kill_uuid,
          false,
          dry_);
    }
  }

  return KillResult::FAILED;
}