public void evaluateClustering()

in samoa-api/src/main/java/org/apache/samoa/evaluation/measures/F1.java [36:108]


  public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) {

    if (clustering.size() < 0) {
      addValue(0, 0);
      addValue(1, 0);
      return;
    }

    MembershipMatrix mm = new MembershipMatrix(clustering, points);
    // System.out.println(mm.toString());

    int numClasses = mm.getNumClasses();
    if (mm.hasNoiseClass())
      numClasses--;

    // F1 as defined in P3C, try using F1 optimization
    double F1_P = 0.0;
    double purity = 0;
    int realClusters = 0;
    for (int i = 0; i < clustering.size(); i++) {
      int max_weight = 0;
      int max_weight_index = -1;

      // find max index
      for (int j = 0; j < numClasses; j++) {
        if (mm.getClusterClassWeight(i, j) > max_weight) {
          max_weight = mm.getClusterClassWeight(i, j);
          max_weight_index = j;
        }
      }
      if (max_weight_index != -1) {
        realClusters++;
        double precision = mm.getClusterClassWeight(i, max_weight_index) / (double) mm.getClusterSum(i);
        double recall = mm.getClusterClassWeight(i, max_weight_index) / (double) mm.getClassSum(max_weight_index);
        double f1 = 0;
        if (precision > 0 || recall > 0) {
          f1 = 2 * precision * recall / (precision + recall);
        }
        F1_P += f1;
        purity += precision;

        // TODO should we move setMeasure stuff into the Cluster interface?
        clustering.get(i).setMeasureValue("F1-P", Double.toString(f1));
      }
    }
    if (realClusters > 0) {
      F1_P /= realClusters;
      purity /= realClusters;
    }
    addValue("F1-P", F1_P);
    addValue("Purity", purity);

    // F1 as defined in .... mainly maximizes F1 for each class
    double F1_R = 0.0;
    for (int j = 0; j < numClasses; j++) {
      double max_f1 = 0;
      for (int i = 0; i < clustering.size(); i++) {
        double precision = mm.getClusterClassWeight(i, j) / (double) mm.getClusterSum(i);
        double recall = mm.getClusterClassWeight(i, j) / (double) mm.getClassSum(j);
        double f1 = 0;
        if (precision > 0 || recall > 0) {
          f1 = 2 * precision * recall / (precision + recall);
        }
        if (max_f1 < f1) {
          max_f1 = f1;
        }
      }
      F1_R += max_f1;
    }
    F1_R /= numClasses;

    addValue("F1-R", F1_R);
  }