in samoa-api/src/main/java/org/apache/samoa/evaluation/measures/EntropyCollection.java [47:171]
public void evaluateClustering(Clustering fclustering, Clustering hClustering, ArrayList<DataPoint> points)
throws Exception {
MembershipMatrix mm = new MembershipMatrix(fclustering, points);
int numClasses = mm.getNumClasses();
int numCluster = fclustering.size() + 1;
int n = mm.getTotalEntries();
double FCentropy = 0;
if (numCluster > 1) {
for (int fc = 0; fc < numCluster; fc++) {
double weight = mm.getClusterSum(fc) / (double) n;
if (weight > 0)
FCentropy += weight * Math.log10(weight);
}
FCentropy /= (-1 * Math.log10(numCluster));
}
logger.debug("FC entropy: {}", FCentropy);
double GTentropy = 0;
if (numClasses > 1) {
for (int hc = 0; hc < numClasses; hc++) {
double weight = mm.getClassSum(hc) / (double) n;
if (weight > 0)
GTentropy += weight * Math.log10(weight);
}
GTentropy /= (-1 * Math.log10(numClasses));
}
logger.debug("GT entropy: {}", GTentropy);
// cluster based entropy
double FCcrossEntropy = 0;
for (int fc = 0; fc < numCluster; fc++) {
double e = 0;
int clusterWeight = mm.getClusterSum(fc);
if (clusterWeight > 0) {
for (int hc = 0; hc < numClasses; hc++) {
double p = mm.getClusterClassWeight(fc, hc) / (double) clusterWeight;
if (p != 0) {
e += p * Math.log10(p);
}
}
FCcrossEntropy += ((clusterWeight / (double) n) * e);
}
}
if (numCluster > 1) {
FCcrossEntropy /= -1 * Math.log10(numCluster);
}
addValue("FC cross entropy", 1 - FCcrossEntropy);
logger.debug("FC cross entropy: {}", 1 - FCcrossEntropy);
// class based entropy
double GTcrossEntropy = 0;
for (int hc = 0; hc < numClasses; hc++) {
double e = 0;
int classWeight = mm.getClassSum(hc);
if (classWeight > 0) {
for (int fc = 0; fc < numCluster; fc++) {
double p = mm.getClusterClassWeight(fc, hc) / (double) classWeight;
if (p != 0) {
e += p * Math.log10(p);
}
}
}
GTcrossEntropy += ((classWeight / (double) n) * e);
}
if (numClasses > 1)
GTcrossEntropy /= -1 * Math.log10(numClasses);
addValue("GT cross entropy", 1 - GTcrossEntropy);
logger.debug("GT cross entropy: {}", 1 - GTcrossEntropy);
double homogeneity;
if (FCentropy == 0)
homogeneity = 1;
else
homogeneity = 1 - FCcrossEntropy / FCentropy;
// TODO set err values for now, needs to be debugged
if (homogeneity > 1 || homogeneity < 0)
addValue("Homogeneity", -1);
else
addValue("Homogeneity", homogeneity);
double completeness;
if (GTentropy == 0)
completeness = 1;
else
completeness = 1 - GTcrossEntropy / GTentropy;
addValue("Completeness", completeness);
double beta = 1;
double vmeasure = (1 + beta) * homogeneity * completeness / (beta * homogeneity + completeness);
if (vmeasure > 1 || homogeneity < 0)
addValue("V-Measure", -1);
else
addValue("V-Measure", vmeasure);
double mutual = 0;
for (int i = 0; i < numCluster; i++) {
for (int j = 0; j < numClasses; j++) {
if (mm.getClusterClassWeight(i, j) == 0)
continue;
double m = Math.log10(mm.getClusterClassWeight(i, j) / (double) mm.getClusterSum(i)
/ (double) mm.getClassSum(j) * (double) n);
m *= mm.getClusterClassWeight(i, j) / (double) n;
logger.debug("( {} / {}): ", m, m);
mutual += m;
}
}
if (numClasses > 1)
mutual /= Math.log10(numClasses);
double varInfo = 1;
if (FCentropy + GTentropy > 0)
varInfo = 2 * mutual / (FCentropy + GTentropy);
logger.debug("mutual: {} / VI: {}", mutual, varInfo);
addValue("VarInformation", varInfo);
}