public void cleanupPids()

in uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java [540:644]


  public void cleanupPids(Set<String> pidsToKill) throws Exception {

    Set<NodeProcessInfo> processes = getProcessesOnNode();
    // Match any folder under /cgroup/ducc that has syntax
    // <number>.<number>.<number>
    // This syntax is assigned by ducc to each cgroup
    Pattern p = Pattern.compile("((\\d+)\\.(\\d+)\\.(\\d+))");

    File cgroupsFolder = new File(getCGroupLocation(CGDuccMemoryPath));
    String[] files = cgroupsFolder.list();
    if (files == null || files.length == 0) {
      return;
    }

    for (String cgroupFolder : files) {
      Matcher m = p.matcher(cgroupFolder);
      // only look at ducc's cgroups
      if (m.find()) {
        try {
          // open proc file which may include PIDs if processes are
          // still running
          File f = new File(getCGroupLocation(CGDuccMemoryPath) + cgroupFolder + CGProcsFile);
          // collect all pids
          String[] pids = readPids(f);

          if (pids != null && pids.length > 0) {
            agentLogger.info("cleanupOnStartup", null, "Agent found " + pids.length
                    + " cgroup proceses still active. Proceeding to remove running processes");
          }

          int zombieCount = 0;
          // kill each runnig process via -9
          if (pids != null && pids.length > 0) {
            for (String pid : pids) {
              if (!isTargetForKill(pidsToKill, pid)) {
                continue;
              }
              // Got cgroup processes still running. Kill them
              for (NodeProcessInfo proc : processes) {
                // Dont kill zombie process as it is already dead. Just increment how many of them
                // we have
                if (proc.isZombie()) {
                  zombieCount++;
                } else if (proc.getPid().equals(pid)) {
                  // kill process hard via -9
                  System.out.println(">>>>>> Killing target process " + proc.getPid());
                  kill(proc.getUserid(), proc.getPid(), NodeAgent.SIGKILL);
                }
              }
            }
            long logCount = 0;
            // it may take some time for the cgroups to udate accounting. Just cycle until
            // the procs file becomes empty under a given cgroup
            while (true) {
              boolean found = false;
              pids = readPids(f);
              for (String pid : pids) {
                if (isTargetForKill(pidsToKill, pid)) {
                  found = true;
                  break; // at least one process from the target list is still running
                }
              }

              // if the cgroup contains no pids or there are only zombie processes dont wait
              // for cgroup accounting. These processes will never terminate. The idea
              // is not to enter into an infinite loop due to zombies
              if (!found || pids == null || pids.length == 0 || (zombieCount == pids.length)) {
                break;
              } else {
                try {
                  synchronized (this) {
                    // log every ~30 minutes (10000 * 200), where 200 is a wait time in ms between
                    // tries
                    if (logCount % 10000 == 0) {
                      agentLogger.info("cleanupOnStartup", null, "--- CGroup:" + cgroupFolder
                              + " procs file still showing processes running. Wait until CGroups updates acccounting");
                    }
                    logCount++;
                    wait(200);

                  }
                } catch (InterruptedException ee) {
                  break;
                }
              }
            }
          }
          // Don't remove CGroups if there are zombie processes there. Otherwise, attempt
          // to remove the CGroup may hang a thread.
          if (zombieCount == 0) { // no zombies in the container
            destroyContainer(cgroupFolder, SYSTEM, NodeAgent.SIGTERM);
            agentLogger.info("cleanupOnStartup", null,
                    "--- Agent Removed Empty CGroup:" + cgroupFolder);
          } else {
            agentLogger.info("cleanupOnStartup", null, "CGroup " + cgroupFolder
                    + " Contains Zombie Processing. Not Removing the Container");
          }
        } catch (FileNotFoundException e) {
          // noop. Cgroup may have been removed already
        } catch (Exception e) {
          agentLogger.error("cleanupOnStartup", null, e);
        }
      }
    }
  }