in uima-ducc-agent/src/main/java/org/apache/uima/ducc/agent/NodeAgent.java [365:569]
public NodeAgent(NodeIdentity nodeIdentity, Launcher launcher, CamelContext context,
AgentConfiguration factory) throws Exception {
super(COMPONENT_NAME, context);
Utils.findDuccHome(); // add DUCC_HOME to System.properties
// Running a real agent
agentVirtual = System.getProperty("ducc.agent.virtual") == null ? false : true;
this.nodeIdentity = nodeIdentity;
this.launcher = launcher;
this.configurationFactory = factory;
this.commonProcessDispatcher = factory.getCommonProcessDispatcher(context);
this.ORDispatcher = factory.getORDispatcher(context);
// fetch Page Size from the OS and cache it
pageSize = getOSPageSize();
numProcessors = getNodeProcessors();
logger.info("NodeAgent", null, "OS Page Size:" + pageSize);
cpuClockRate = getOSClockRate();
logger.info("NodeAgent", null, "OS Clock Rate:" + cpuClockRate);
if (System.getProperty("ducc.rm.share.quantum") != null
&& System.getProperty("ducc.rm.share.quantum").trim().length() > 0) {
shareQuantum = Integer.parseInt(System.getProperty("ducc.rm.share.quantum").trim());
}
/* Enable CGROUPS */
String cgroups;
String cgUtilsPath = null;
boolean excludeNodeFromCGroups = false;
if (!isVirtual()
&& (cgroups = System.getProperty("ducc.agent.launcher.cgroups.enable")) != null) {
if (cgroups.equalsIgnoreCase("true")) {
logger.info("nodeAgent", null, "ducc.properties [ducc.agent.launcher.cgroups.enable=true]");
// Load exclusion file. Some nodes may be excluded from cgroups
String exclusionFile;
// get the name of the exclusion file from ducc.properties
if ((exclusionFile = System.getProperty("ducc.agent.exclusion.file")) != null) {
logger.info("nodeAgent", null,
"Ducc configured with cgroup node exclusion file - ducc.properties [ducc.agent.exclusion.file="
+ exclusionFile + "]");
// Parse node exclusion file and determine if cgroups and AP
// deployment
// is allowed on this node
NodeExclusionParser exclusionParser = new NodeExclusionParser();
exclusionParser.parse(exclusionFile);
excludeNodeFromCGroups = exclusionParser.cgroupsExcluded();
excludeAPs = exclusionParser.apExcluded();
if (excludeNodeFromCGroups) {
logger.info("nodeAgent", null,
"------- Node Explicitly Excluded From Using CGroups. Check File:"
+ exclusionFile);
cgroupFailureReason = "------- Node Explicitly Excluded From Using CGroups. Check File:"
+ exclusionFile;
}
System.out.println(
"excludeNodeFromCGroups=" + excludeNodeFromCGroups + " excludeAPs=" + excludeAPs);
} else {
logger.info("nodeAgent", null, "Agent node *not* excluded from using cgroups");
}
// node not in the exclusion list for cgroups
if (!excludeNodeFromCGroups) {
// fetch a list of paths the agent will search to find cgroups utils
// like cgexec. The default location is /usr/bin
logger.info("nodeAgent", null,
"Testing cgroups to check if runtime utilities (cgexec) exist in expected locations in the filesystem");
String cgroupsUtilsDirs = System.getProperty("ducc.agent.launcher.cgroups.utils.dir");
if (cgroupsUtilsDirs == null) {
cgUtilsPath = "/usr/bin"; // default
} else {
String[] paths = cgroupsUtilsDirs.split(",");
for (String path : paths) {
File file = new File(path.trim() + "/cgexec");
if (file.exists()) {
cgUtilsPath = path;
break;
}
}
}
// scan /proc/mounts for base cgroup dir
String cgroupsBaseDir = fetchCgroupsBaseDir("/proc/mounts");
if (cgUtilsPath == null) {
useCgroups = false;
logger.info("nodeAgent", null,
"------- CGroups Disabled - Unable to Find Cgroups Utils Directory. Add/Modify ducc.agent.launcher.cgroups.utils.dir property in ducc.properties");
} else if (cgroupsBaseDir == null || cgroupsBaseDir.trim().length() == 0) {
useCgroups = false;
logger.info("nodeAgent", null,
"------- CGroups Disabled - Unable to Find Cgroups Root Directory in /proc/mounts");
} else {
logger.info("nodeAgent", null, "Agent found cgroups runtime in " + cgUtilsPath
+ " cgroups base dir=" + cgroupsBaseDir);
// if cpuacct is configured in cgroups, the subsystems list will be updated
String cgroupsSubsystems = "memory,cpu";
long maxTimeToWaitForProcessToStop = 60000; // default 1 minute
if (configurationFactory.processStopTimeout != null) {
maxTimeToWaitForProcessToStop = Long.valueOf(configurationFactory.processStopTimeout);
}
cgroupsManager = new CGroupsManager(cgUtilsPath, cgroupsBaseDir, cgroupsSubsystems,
logger, maxTimeToWaitForProcessToStop);
cgroupsManager.configure(this);
// check if cgroups base directory exists in the filesystem
// which means that cgroups
// and cgroups convenience package are installed and the
// daemon is up and running.
if (cgroupsManager.cgroupExists(cgroupsBaseDir)) {
logger.info("nodeAgent", null,
"Agent found cgroup base directory in " + cgroupsBaseDir);
try {
String containerId = "test";
// validate cgroups by creating a dummy cgroup. The code checks if cgroup actually
// got created by
// verifying existence of test cgroup file. The second step in verification is to
// check if
// CPU control is working. Configured in cgconfig.conf, the CPU control allows for
// setting
// cpu.shares. The code will attempt to set the shares and subsequently tries to
// read the
// value from cpu.shares file to make sure the values match. Any exception in the
// above steps
// will cause cgroups to be disabled.
//
cgroupsManager.validator(cgroupsBaseDir, containerId,
System.getProperty("user.name"), false).cgcreate().cgset(100); // write
// cpu.shares=100
// and
// validate
// cleanup dummy cgroup
cgroupsManager.destroyContainer(containerId, System.getProperty("user.name"),
SIGKILL);
useCgroups = true;
} catch (CGroupsManager.CGroupsException ee) {
logger.info("nodeAgent", null, ee);
cgroupFailureReason = ee.getMessage();
useCgroups = false;
}
if (useCgroups) {
try {
// remove stale CGroups
cgroupsManager.cleanup();
} catch (Exception e) {
logger.error("nodeAgent", null, e);
useCgroups = false;
logger.info("nodeAgent", null,
"Agent cgroup cleanup failed on this machine base directory in "
+ cgroupsBaseDir
+ ". Check if cgroups is installed on this node, Agent has correct permissions (consistent with cgconfig.conf), and the cgroup daemon is running");
cgroupFailureReason = "------- CGroups Not Working on this Machine";
}
} else {
logger.info("nodeAgent", null,
"Agent cgroup test failed on this machine base directory in "
+ cgroupsBaseDir
+ ". Check if cgroups is installed on this node, Agent has correct permissions (consistent with cgconfig.conf), and the cgroup daemon is running");
cgroupFailureReason = "------- CGroups Not Working on this Machine";
}
} else {
logger.info("nodeAgent", null, "Agent failed to find cgroup base directory in "
+ cgroupsBaseDir
+ ". Check if cgroups is installed on this node and the cgroup daemon is running");
// logger.info("nodeAgent", null, "------- CGroups Not Installed on this Machine");
cgroupFailureReason = "------- CGroups Not Installed on this Machine";
}
}
}
}
} else {
logger.info("nodeAgent", null, "------- CGroups Not Enabled on this Machine");
cgroupFailureReason = "------- CGroups Not Enabled on this Machine - check ducc.properties: ducc.agent.launcher.cgroups.enable ";
}
// begin publishing node metrics
factory.startNodeMetrics(this);
logger.info("nodeAgent", null,
"CGroup Support=" + useCgroups + " excludeNodeFromCGroups=" + excludeNodeFromCGroups
+ " excludeAPs=" + excludeAPs + " CGroups utils Dir:" + cgUtilsPath);
String useSpawn = System.getProperty("ducc.agent.launcher.use.ducc_spawn");
if (useSpawn != null && useSpawn.toLowerCase().equals("true")) {
runWithDuccLing = true;
String c_launcher_path = Utils.resolvePlaceholderIfExists(
System.getProperty("ducc.agent.launcher.ducc_spawn_path"), System.getProperties());
try {
File duccLing = new File(c_launcher_path);
if (duccLing.exists()) {
duccLingExists = true;
}
} catch (Exception e) {
logger.info("nodeAgent", null,
"------- Agent failed while checking for existence of ducc_ling", e);
}
}
}