in samoa-api/src/main/java/org/apache/samoa/evaluation/measures/CMM.java [278:390]
private void calculateError() {
int totalErrorCount = 0;
int totalRedundancy = 0;
int trueCoverage = 0;
int totalCoverage = 0;
int numNoise = 0;
double errorNoise = 0;
double errorNoiseMax = 0;
double errorMissed = 0;
double errorMissedMax = 0;
double errorMisplaced = 0;
double errorMisplacedMax = 0;
double totalError = 0.0;
double totalErrorMax = 0.0;
/**
* mainly iterate over all points and find the right error value for the point. within the same run calculate
* various other stuff like coverage etc...
*/
for (int p = 0; p < numPoints; p++) {
CMMPoint cmdp = gtAnalysis.getPoint(p);
double weight = cmdp.weight();
// noise counter
if (cmdp.isNoise()) {
numNoise++;
// this is always 1
errorNoiseMax += cmdp.connectivity * weight;
}
else {
errorMissedMax += cmdp.connectivity * weight;
errorMisplacedMax += cmdp.connectivity * weight;
}
// sum up maxError as the individual errors are the quality weighted
// between 0-1
totalErrorMax += cmdp.connectivity * weight;
double err = 0;
int coverage = 0;
// check every FCluster
for (int c = 0; c < numFClusters; c++) {
// contained in cluster c?
if (pointInclusionProbFC[p][c] >= pointInclusionProbThreshold) {
coverage++;
if (!cmdp.isNoise()) {
// PLACED CORRECTLY
if (matchMap[c] == cmdp.workclass()) {
}
// MISPLACED
else {
double errvalue = misplacedError(cmdp, c);
if (errvalue > err)
err = errvalue;
}
}
else {
// NOISE
double errvalue = noiseError(cmdp, c);
if (errvalue > err)
err = errvalue;
}
}
}
// not in any cluster
if (coverage == 0) {
// MISSED
if (!cmdp.isNoise()) {
err = missedError(cmdp, true);
errorMissed += weight * err;
}
// NOISE
else {
}
}
else {
if (!cmdp.isNoise()) {
errorMisplaced += err * weight;
}
else {
errorNoise += err * weight;
}
}
/* processing of other evaluation values */
totalError += err * weight;
if (err != 0)
totalErrorCount++;
if (coverage > 0)
totalCoverage++; // points covered by clustering (incl. noise)
if (coverage > 0 && !cmdp.isNoise())
trueCoverage++; // points covered by clustering, don't count noise
if (coverage > 1)
totalRedundancy++; // include noise
cmdp.p.setMeasureValue("CMM", err);
cmdp.p.setMeasureValue("Redundancy", coverage);
}
addValue("CMM", (totalErrorMax != 0) ? 1 - totalError / totalErrorMax : 1);
addValue("CMM Missed", (errorMissedMax != 0) ? 1 - errorMissed / errorMissedMax : 1);
addValue("CMM Misplaced", (errorMisplacedMax != 0) ? 1 - errorMisplaced / errorMisplacedMax : 1);
addValue("CMM Noise", (errorNoiseMax != 0) ? 1 - errorNoise / errorNoiseMax : 1);
addValue("CMM Basic", 1 - ((double) totalErrorCount / (double) numPoints));
if (debug) {
System.out.println("-------------");
}
}