in samoa-api/src/main/java/org/apache/samoa/evaluation/measures/StatisticalCollection.java [118:184]
public double cindex(Clustering clustering, ArrayList<DataPoint> points) {
int numClusters = clustering.size();
double withinClustersDistance = 0;
int numDistancesWithin = 0;
double numDistances = 0;
// double[] withinClusters = new double[numClusters];
double[] minWithinClusters = new double[numClusters];
double[] maxWithinClusters = new double[numClusters];
ArrayList<Integer>[] pointsInClusters = new ArrayList[numClusters];
for (int c = 0; c < numClusters; c++) {
pointsInClusters[c] = new ArrayList<>();
minWithinClusters[c] = Double.MAX_VALUE;
maxWithinClusters[c] = Double.MIN_VALUE;
}
for (int p = 0; p < points.size(); p++) {
for (int c = 0; c < clustering.size(); c++) {
if (clustering.get(c).getInclusionProbability(points.get(p)) > 0.8) {
pointsInClusters[c].add(p);
numDistances++;
}
}
}
// calc within cluster distances + min and max values
for (int c = 0; c < numClusters; c++) {
int numDistancesInC = 0;
ArrayList<Integer> pointsInC = pointsInClusters[c];
for (int p = 0; p < pointsInC.size(); p++) {
DataPoint point = points.get(pointsInC.get(p));
for (int p1 = p + 1; p1 < pointsInC.size(); p1++) {
numDistancesWithin++;
numDistancesInC++;
DataPoint point1 = points.get(pointsInC.get(p1));
double dist = point.getDistance(point1);
withinClustersDistance += dist;
if (minWithinClusters[c] > dist)
minWithinClusters[c] = dist;
if (maxWithinClusters[c] < dist)
maxWithinClusters[c] = dist;
}
}
}
double minWithin = Double.MAX_VALUE;
double maxWithin = Double.MIN_VALUE;
for (int c = 0; c < numClusters; c++) {
if (minWithinClusters[c] < minWithin)
minWithin = minWithinClusters[c];
if (maxWithinClusters[c] > maxWithin)
maxWithin = maxWithinClusters[c];
}
double cindex = 0;
if (numDistancesWithin != 0) {
double meanWithinClustersDistance = withinClustersDistance / numDistancesWithin;
cindex = (meanWithinClustersDistance - minWithin) / (maxWithin - minWithin);
}
if (debug) {
System.out.println("Min:" + Arrays.toString(minWithinClusters));
System.out.println("Max:" + Arrays.toString(maxWithinClusters));
System.out.println("totalWithin:" + numDistancesWithin);
}
return cindex;
}