public void evaluateClustering()

in samoa-api/src/main/java/org/apache/samoa/evaluation/measures/EntropyCollection.java [47:171]


  public void evaluateClustering(Clustering fclustering, Clustering hClustering, ArrayList<DataPoint> points)
      throws Exception {

    MembershipMatrix mm = new MembershipMatrix(fclustering, points);
    int numClasses = mm.getNumClasses();
    int numCluster = fclustering.size() + 1;
    int n = mm.getTotalEntries();

    double FCentropy = 0;
    if (numCluster > 1) {
      for (int fc = 0; fc < numCluster; fc++) {
        double weight = mm.getClusterSum(fc) / (double) n;
        if (weight > 0)
          FCentropy += weight * Math.log10(weight);
      }
      FCentropy /= (-1 * Math.log10(numCluster));
    }

    logger.debug("FC entropy: {}", FCentropy);

    double GTentropy = 0;
    if (numClasses > 1) {
      for (int hc = 0; hc < numClasses; hc++) {
        double weight = mm.getClassSum(hc) / (double) n;
        if (weight > 0)
          GTentropy += weight * Math.log10(weight);
      }
      GTentropy /= (-1 * Math.log10(numClasses));
    }

    logger.debug("GT entropy: {}", GTentropy);

    // cluster based entropy
    double FCcrossEntropy = 0;

    for (int fc = 0; fc < numCluster; fc++) {
      double e = 0;
      int clusterWeight = mm.getClusterSum(fc);
      if (clusterWeight > 0) {
        for (int hc = 0; hc < numClasses; hc++) {
          double p = mm.getClusterClassWeight(fc, hc) / (double) clusterWeight;
          if (p != 0) {
            e += p * Math.log10(p);
          }
        }
        FCcrossEntropy += ((clusterWeight / (double) n) * e);
      }
    }
    if (numCluster > 1) {
      FCcrossEntropy /= -1 * Math.log10(numCluster);
    }

    addValue("FC cross entropy", 1 - FCcrossEntropy);
    logger.debug("FC cross entropy: {}", 1 - FCcrossEntropy);

    // class based entropy
    double GTcrossEntropy = 0;
    for (int hc = 0; hc < numClasses; hc++) {
      double e = 0;
      int classWeight = mm.getClassSum(hc);
      if (classWeight > 0) {
        for (int fc = 0; fc < numCluster; fc++) {
          double p = mm.getClusterClassWeight(fc, hc) / (double) classWeight;
          if (p != 0) {
            e += p * Math.log10(p);
          }
        }
      }
      GTcrossEntropy += ((classWeight / (double) n) * e);
    }
    if (numClasses > 1)
      GTcrossEntropy /= -1 * Math.log10(numClasses);
    addValue("GT cross entropy", 1 - GTcrossEntropy);
    logger.debug("GT cross entropy: {}", 1 - GTcrossEntropy);

    double homogeneity;
    if (FCentropy == 0)
      homogeneity = 1;
    else
      homogeneity = 1 - FCcrossEntropy / FCentropy;

    // TODO set err values for now, needs to be debugged
    if (homogeneity > 1 || homogeneity < 0)
      addValue("Homogeneity", -1);
    else
      addValue("Homogeneity", homogeneity);

    double completeness;
    if (GTentropy == 0)
      completeness = 1;
    else
      completeness = 1 - GTcrossEntropy / GTentropy;
    addValue("Completeness", completeness);

    double beta = 1;
    double vmeasure = (1 + beta) * homogeneity * completeness / (beta * homogeneity + completeness);

    if (vmeasure > 1 || homogeneity < 0)
      addValue("V-Measure", -1);
    else
      addValue("V-Measure", vmeasure);

    double mutual = 0;
    for (int i = 0; i < numCluster; i++) {
      for (int j = 0; j < numClasses; j++) {
        if (mm.getClusterClassWeight(i, j) == 0)
          continue;
        double m = Math.log10(mm.getClusterClassWeight(i, j) / (double) mm.getClusterSum(i)
            / (double) mm.getClassSum(j) * (double) n);
        m *= mm.getClusterClassWeight(i, j) / (double) n;
        logger.debug("( {} / {}): ", m, m);
        mutual += m;
      }
    }
    if (numClasses > 1)
      mutual /= Math.log10(numClasses);

    double varInfo = 1;
    if (FCentropy + GTentropy > 0)
      varInfo = 2 * mutual / (FCentropy + GTentropy);

    logger.debug("mutual: {} / VI: {}", mutual, varInfo);
    addValue("VarInformation", varInfo);

  }