in uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/launcher/CGroupsManager.java [540:644]
public void cleanupPids(Set<String> pidsToKill) throws Exception {
Set<NodeProcessInfo> processes = getProcessesOnNode();
// Match any folder under /cgroup/ducc that has syntax
// <number>.<number>.<number>
// This syntax is assigned by ducc to each cgroup
Pattern p = Pattern.compile("((\\d+)\\.(\\d+)\\.(\\d+))");
File cgroupsFolder = new File(getCGroupLocation(CGDuccMemoryPath));
String[] files = cgroupsFolder.list();
if (files == null || files.length == 0) {
return;
}
for (String cgroupFolder : files) {
Matcher m = p.matcher(cgroupFolder);
// only look at ducc's cgroups
if (m.find()) {
try {
// open proc file which may include PIDs if processes are
// still running
File f = new File(getCGroupLocation(CGDuccMemoryPath) + cgroupFolder + CGProcsFile);
// collect all pids
String[] pids = readPids(f);
if (pids != null && pids.length > 0) {
agentLogger.info("cleanupOnStartup", null, "Agent found " + pids.length
+ " cgroup proceses still active. Proceeding to remove running processes");
}
int zombieCount = 0;
// kill each runnig process via -9
if (pids != null && pids.length > 0) {
for (String pid : pids) {
if (!isTargetForKill(pidsToKill, pid)) {
continue;
}
// Got cgroup processes still running. Kill them
for (NodeProcessInfo proc : processes) {
// Dont kill zombie process as it is already dead. Just increment how many of them
// we have
if (proc.isZombie()) {
zombieCount++;
} else if (proc.getPid().equals(pid)) {
// kill process hard via -9
System.out.println(">>>>>> Killing target process " + proc.getPid());
kill(proc.getUserid(), proc.getPid(), NodeAgent.SIGKILL);
}
}
}
long logCount = 0;
// it may take some time for the cgroups to udate accounting. Just cycle until
// the procs file becomes empty under a given cgroup
while (true) {
boolean found = false;
pids = readPids(f);
for (String pid : pids) {
if (isTargetForKill(pidsToKill, pid)) {
found = true;
break; // at least one process from the target list is still running
}
}
// if the cgroup contains no pids or there are only zombie processes dont wait
// for cgroup accounting. These processes will never terminate. The idea
// is not to enter into an infinite loop due to zombies
if (!found || pids == null || pids.length == 0 || (zombieCount == pids.length)) {
break;
} else {
try {
synchronized (this) {
// log every ~30 minutes (10000 * 200), where 200 is a wait time in ms between
// tries
if (logCount % 10000 == 0) {
agentLogger.info("cleanupOnStartup", null, "--- CGroup:" + cgroupFolder
+ " procs file still showing processes running. Wait until CGroups updates acccounting");
}
logCount++;
wait(200);
}
} catch (InterruptedException ee) {
break;
}
}
}
}
// Don't remove CGroups if there are zombie processes there. Otherwise, attempt
// to remove the CGroup may hang a thread.
if (zombieCount == 0) { // no zombies in the container
destroyContainer(cgroupFolder, SYSTEM, NodeAgent.SIGTERM);
agentLogger.info("cleanupOnStartup", null,
"--- Agent Removed Empty CGroup:" + cgroupFolder);
} else {
agentLogger.info("cleanupOnStartup", null, "CGroup " + cgroupFolder
+ " Contains Zombie Processing. Not Removing the Container");
}
} catch (FileNotFoundException e) {
// noop. Cgroup may have been removed already
} catch (Exception e) {
agentLogger.error("cleanupOnStartup", null, e);
}
}
}
}