in samoa-api/src/main/java/org/apache/samoa/learners/classifiers/ensemble/BoostMAProcessor.java [169:241]
public void continueAttemptToSplit(ActiveLearningNode bvhtActiveLearningNode, FoundNode foundNode) {
BoostVHTActiveLearningNode bVHTActiveLearningNode = (BoostVHTActiveLearningNode)bvhtActiveLearningNode;
AttributeSplitSuggestion bestSuggestion = bVHTActiveLearningNode.getDistributedBestSuggestion();
AttributeSplitSuggestion secondBestSuggestion = bVHTActiveLearningNode.getDistributedSecondBestSuggestion();
// compare with null split
double[] preSplitDist = bVHTActiveLearningNode.getObservedClassDistribution();
AttributeSplitSuggestion nullSplit = new AttributeSplitSuggestion(null, new double[0][],
this.splitCriterion.getMeritOfSplit(preSplitDist, new double[][] { preSplitDist }));
if ((bestSuggestion == null) || (nullSplit.compareTo(bestSuggestion) > 0)) {
secondBestSuggestion = bestSuggestion;
bestSuggestion = nullSplit;
} else {
if ((secondBestSuggestion == null) || (nullSplit.compareTo(secondBestSuggestion) > 0)) {
secondBestSuggestion = nullSplit;
}
}
boolean shouldSplit = false;
if (secondBestSuggestion == null) {
shouldSplit = true;
} else {
double hoeffdingBound = computeHoeffdingBound(
this.splitCriterion.getRangeOfMerit(bVHTActiveLearningNode.getObservedClassDistribution()), this.splitConfidence,
bVHTActiveLearningNode.getWeightSeen());
if ((bestSuggestion.merit - secondBestSuggestion.merit > hoeffdingBound) || (hoeffdingBound < tieThreshold)) {
shouldSplit = true;
}
// TODO: add poor attributes removal
}
SplitNode parent = foundNode.getParent();
int parentBranch = foundNode.getParentBranch();
// split if the Hoeffding bound condition is satisfied
if (shouldSplit) {
if (bestSuggestion.splitTest != null) { // TODO: What happens when bestSuggestion is null? -> Deactivate node?
SplitNode newSplit = new SplitNode(bestSuggestion.splitTest, bVHTActiveLearningNode.getObservedClassDistribution());
for (int i = 0; i < bestSuggestion.numSplits(); i++) {
Node newChild = newLearningNode(bestSuggestion.resultingClassDistributionFromSplit(i), this.parallelismHint);
newSplit.setChild(i, newChild);
}
this.activeLeafNodeCount--;
this.decisionNodeCount++;
this.activeLeafNodeCount += bestSuggestion.numSplits();
if (parent == null) {
this.treeRoot = newSplit;
} else {
parent.setChild(parentBranch, newSplit);
}
//if keep w buffer
if (splittingOption == SplittingOption.KEEP && this.maxBufferSize > 0) {
Queue<Instance> buffer = bVHTActiveLearningNode.getBuffer();
// logger.debug("node: {}. split is happening, there are {} items in buffer", activeLearningNode.getId(), buffer.size());
while (!buffer.isEmpty()) {
this.trainOnInstanceImpl(buffer.poll());
}
}
}
// TODO: add check on the model's memory size
}
// housekeeping
bVHTActiveLearningNode.endSplitting();
bVHTActiveLearningNode.setWeightSeenAtLastSplitEvaluation(bVHTActiveLearningNode.getWeightSeen());
}