public Clustering()

in samoa-api/src/main/java/org/apache/samoa/moa/cluster/Clustering.java [84:168]


  public Clustering(ArrayList<DataPoint> points, double overlapThreshold, int initMinPoints) {
    HashMap<Integer, Integer> labelMap = Clustering.classValues(points);
    int dim = points.get(0).dataset().numAttributes() - 1;

    int numClasses = labelMap.size();

    ArrayList<DataPoint>[] sorted_points = (ArrayList<DataPoint>[]) new ArrayList[numClasses];
    for (int i = 0; i < numClasses; i++) {
      sorted_points[i] = new ArrayList<>();
    }

    for (DataPoint point : points) {
      int clusterId = (int) point.classValue();
      if (clusterId != -1) {
        sorted_points[labelMap.get(clusterId)].add(point);
      }
    }

    clusters = new AutoExpandVector<>();
    for (int i = 0; i < numClasses; i++) {
      ArrayList<SphereCluster> microByClass = new ArrayList<>();
      ArrayList<DataPoint> pointInCluster = new ArrayList<>();
      ArrayList<ArrayList<Instance>> pointInMicroClusters = new ArrayList<>();

      pointInCluster.addAll(sorted_points[i]);
      while (pointInCluster.size() > 0) {
        ArrayList<Instance> micro_points = new ArrayList<>();
        for (int j = 0; j < initMinPoints && !pointInCluster.isEmpty(); j++) {
          micro_points.add(pointInCluster.get(0));
          pointInCluster.remove(0);
        }
        if (micro_points.size() > 0) {
          SphereCluster s = new SphereCluster(micro_points, dim);
          for (int c = 0; c < microByClass.size(); c++) {
            if ((microByClass.get(c)).overlapRadiusDegree(s) > overlapThreshold) {
              micro_points.addAll(pointInMicroClusters.get(c));
              s = new SphereCluster(micro_points, dim);
              pointInMicroClusters.remove(c);
              microByClass.remove(c);
            }
          }

          for (int j = 0; j < pointInCluster.size(); j++) {
            Instance instance = pointInCluster.get(j);
            if (s.getInclusionProbability(instance) > 0.8) {
              pointInCluster.remove(j);
              micro_points.add(instance);
            }
          }
          s.setWeight(micro_points.size());
          microByClass.add(s);
          pointInMicroClusters.add(micro_points);
        }
      }
      //
      boolean changed = true;
      while (changed) {
        changed = false;
        for (int c = 0; c < microByClass.size(); c++) {
          for (int c1 = c + 1; c1 < microByClass.size(); c1++) {
            double overlap = microByClass.get(c).overlapRadiusDegree(microByClass.get(c1));
            if (overlap > overlapThreshold) {
              pointInMicroClusters.get(c).addAll(pointInMicroClusters.get(c1));
              SphereCluster s = new SphereCluster(pointInMicroClusters.get(c), dim);
              microByClass.set(c, s);
              pointInMicroClusters.remove(c1);
              microByClass.remove(c1);
              changed = true;
              break;
            }
          }
        }
      }

      for (SphereCluster microByClas : microByClass) {
        microByClas.setGroundTruth(sorted_points[i].get(0).classValue());
        clusters.add(microByClas);
      }
    }

    for (int j = 0; j < clusters.size(); j++) {
      clusters.get(j).setId(j);
    }

  }