private void calculateMatching()

in samoa-api/src/main/java/org/apache/samoa/evaluation/measures/CMM.java [144:273]


  private void calculateMatching() {

    /**
     * found cluster frequencies
     */
    int[][] mapFC = new int[numFClusters][numGT0Classes];

    /**
     * ground truth cluster frequencies
     */
    int[][] mapGT = new int[numGT0Classes][numGT0Classes];
    int[] sumsFC = new int[numFClusters];

    // calculate fuzzy mapping from
    pointInclusionProbFC = new double[numPoints][numFClusters];
    for (int p = 0; p < numPoints; p++) {
      CMMPoint cmdp = gtAnalysis.getPoint(p);
      // found cluster frequencies
      for (int fc = 0; fc < numFClusters; fc++) {
        Cluster cl = clustering.get(fc);
        pointInclusionProbFC[p][fc] = cl.getInclusionProbability(cmdp);
        if (pointInclusionProbFC[p][fc] >= pointInclusionProbThreshold) {
          // make sure we don't count points twice that are contained in two
          // merged clusters
          if (cmdp.isNoise())
            continue;
          mapFC[fc][cmdp.workclass()]++;
          sumsFC[fc]++;
        }
      }

      // ground truth cluster frequencies
      if (!cmdp.isNoise()) {
        for (int hc = 0; hc < numGT0Classes; hc++) {
          if (hc == cmdp.workclass()) {
            mapGT[hc][hc]++;
          }
          else {
            if (gtAnalysis.getGT0Cluster(hc).getInclusionProbability(cmdp) >= 1) {
              mapGT[hc][cmdp.workclass()]++;
            }
          }
        }
      }
    }

    // assign each found cluster to a hidden cluster
    matchMap = new int[numFClusters];
    for (int fc = 0; fc < numFClusters; fc++) {
      int matchIndex = -1;
      // check if we only have one entry anyway
      for (int hc0 = 0; hc0 < numGT0Classes; hc0++) {
        if (mapFC[fc][hc0] != 0) {
          if (matchIndex == -1)
            matchIndex = hc0;
          else {
            matchIndex = -1;
            break;
          }
        }
      }

      // more then one entry, so look for most similar frequency profile
      int minDiff = Integer.MAX_VALUE;
      if (sumsFC[fc] != 0 && matchIndex == -1) {
        ArrayList<Integer> fitCandidates = new ArrayList<Integer>();
        for (int hc0 = 0; hc0 < numGT0Classes; hc0++) {
          int errDiff = 0;
          for (int hc1 = 0; hc1 < numGT0Classes; hc1++) {
            // fc profile doesn't fit into current hc profile
            double freq_diff = mapFC[fc][hc1] - mapGT[hc0][hc1];
            if (freq_diff > 0) {
              errDiff += freq_diff;
            }
          }
          if (errDiff == 0) {
            fitCandidates.add(hc0);
          }
          if (errDiff < minDiff) {
            minDiff = errDiff;
            matchIndex = hc0;
          }
          if (debug) {
            // System.out.println("FC"+fc+"("+Arrays.toString(mapFC[fc])+") - HC0_"+hc0+"("+Arrays.toString(mapGT[hc0])+"):"+errDiff);
          }
        }
        // if we have a fitting profile overwrite the min error choice
        // if we have multiple fit candidates, use majority vote of
        // corresponding classes
        if (fitCandidates.size() != 0) {
          int bestGTfit = fitCandidates.get(0);
          for (int i = 1; i < fitCandidates.size(); i++) {
            int GTfit = fitCandidates.get(i);
            if (mapFC[fc][GTfit] > mapFC[fc][bestGTfit])
              bestGTfit = fitCandidates.get(i);
          }
          matchIndex = bestGTfit;
        }
      }

      matchMap[fc] = matchIndex;
      int realMatch = -1;
      if (matchIndex == -1) {
        if (debug)
          System.out.println("No cluster match: needs to be implemented?");
      }
      else {
        realMatch = gtAnalysis.getGT0Cluster(matchMap[fc]).getLabel();
      }
      clustering.get(fc).setMeasureValue("CMM Match", "C" + realMatch);
      clustering.get(fc).setMeasureValue("CMM Workclass", "C" + matchMap[fc]);
    }

    // print matching table
    if (debug) {
      for (int i = 0; i < numFClusters; i++) {
        System.out.print("C" + ((int) clustering.get(i).getId()) + " N:" + ((int) clustering.get(i).getWeight())
            + "  |  ");
        for (int j = 0; j < numGT0Classes; j++) {
          System.out.print(mapFC[i][j] + " ");
        }
        System.out.print(" = " + sumsFC[i] + " | ");
        String match = "-";
        if (matchMap[i] != -1) {
          match = Integer.toString(gtAnalysis.getGT0Cluster(matchMap[i]).getLabel());
        }
        System.out.println(" --> " + match + "(work:" + matchMap[i] + ")");
      }
    }
  }