private void calculateError()

in samoa-api/src/main/java/org/apache/samoa/evaluation/measures/CMM.java [278:390]


  private void calculateError() {
    int totalErrorCount = 0;
    int totalRedundancy = 0;
    int trueCoverage = 0;
    int totalCoverage = 0;

    int numNoise = 0;
    double errorNoise = 0;
    double errorNoiseMax = 0;

    double errorMissed = 0;
    double errorMissedMax = 0;

    double errorMisplaced = 0;
    double errorMisplacedMax = 0;

    double totalError = 0.0;
    double totalErrorMax = 0.0;

    /**
     * mainly iterate over all points and find the right error value for the point. within the same run calculate
     * various other stuff like coverage etc...
     */
    for (int p = 0; p < numPoints; p++) {
      CMMPoint cmdp = gtAnalysis.getPoint(p);
      double weight = cmdp.weight();
      // noise counter
      if (cmdp.isNoise()) {
        numNoise++;
        // this is always 1
        errorNoiseMax += cmdp.connectivity * weight;
      }
      else {
        errorMissedMax += cmdp.connectivity * weight;
        errorMisplacedMax += cmdp.connectivity * weight;
      }
      // sum up maxError as the individual errors are the quality weighted
      // between 0-1
      totalErrorMax += cmdp.connectivity * weight;

      double err = 0;
      int coverage = 0;

      // check every FCluster
      for (int c = 0; c < numFClusters; c++) {
        // contained in cluster c?
        if (pointInclusionProbFC[p][c] >= pointInclusionProbThreshold) {
          coverage++;

          if (!cmdp.isNoise()) {
            // PLACED CORRECTLY
            if (matchMap[c] == cmdp.workclass()) {
            }
            // MISPLACED
            else {
              double errvalue = misplacedError(cmdp, c);
              if (errvalue > err)
                err = errvalue;
            }
          }
          else {
            // NOISE
            double errvalue = noiseError(cmdp, c);
            if (errvalue > err)
              err = errvalue;
          }
        }
      }
      // not in any cluster
      if (coverage == 0) {
        // MISSED
        if (!cmdp.isNoise()) {
          err = missedError(cmdp, true);
          errorMissed += weight * err;
        }
        // NOISE
        else {
        }
      }
      else {
        if (!cmdp.isNoise()) {
          errorMisplaced += err * weight;
        }
        else {
          errorNoise += err * weight;
        }
      }

      /* processing of other evaluation values */
      totalError += err * weight;
      if (err != 0)
        totalErrorCount++;
      if (coverage > 0)
        totalCoverage++; // points covered by clustering (incl. noise)
      if (coverage > 0 && !cmdp.isNoise())
        trueCoverage++; // points covered by clustering, don't count noise
      if (coverage > 1)
        totalRedundancy++; // include noise

      cmdp.p.setMeasureValue("CMM", err);
      cmdp.p.setMeasureValue("Redundancy", coverage);
    }

    addValue("CMM", (totalErrorMax != 0) ? 1 - totalError / totalErrorMax : 1);
    addValue("CMM Missed", (errorMissedMax != 0) ? 1 - errorMissed / errorMissedMax : 1);
    addValue("CMM Misplaced", (errorMisplacedMax != 0) ? 1 - errorMisplaced / errorMisplacedMax : 1);
    addValue("CMM Noise", (errorNoiseMax != 0) ? 1 - errorNoise / errorNoiseMax : 1);
    addValue("CMM Basic", 1 - ((double) totalErrorCount / (double) numPoints));

    if (debug) {
      System.out.println("-------------");
    }
  }