public void trainOnInstanceImpl()

in samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java [80:178]


  public void trainOnInstanceImpl(Instance instance) {
    int dim = instance.numValues();
    timestamp++;
    // 0. Initialize
    if (!initialized) {
      if (buffer.size() < bufferSize) {
        buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m));
        return;
      }

      int k = kernels.length;
      // System.err.println("k="+k+" bufferSize="+bufferSize);
      assert (k <= bufferSize);

      ClustreamKernel[] centers = new ClustreamKernel[k];
      for (int i = 0; i < k; i++) {
        centers[i] = buffer.get(i); // TODO: make random!
      }
      Clustering kmeans_clustering = kMeans(k, centers, buffer);
      // Clustering kmeans_clustering = kMeans(k, buffer);

      for (int i = 0; i < kmeans_clustering.size(); i++) {
        kernels[i] = new ClustreamKernel(new DenseInstance(1.0, centers[i].getCenter()), dim, timestamp, t, m);
      }

      buffer.clear();
      initialized = true;
      return;
    }

    // 1. Determine closest kernel
    ClustreamKernel closestKernel = null;
    double minDistance = Double.MAX_VALUE;
    for (ClustreamKernel kernel : kernels) {
      // System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation());
      double distance = distance(instance.toDoubleArray(), kernel.getCenter());
      if (distance < minDistance) {
        closestKernel = kernel;
        minDistance = distance;
      }
    }

    // 2. Check whether instance fits into closestKernel
    double radius;
    if (closestKernel != null && closestKernel.getWeight() == 1) {
      // Special case: estimate radius by determining the distance to the
      // next closest cluster
      radius = Double.MAX_VALUE;
      double[] center = closestKernel.getCenter();
      for (ClustreamKernel kernel : kernels) {
        if (kernel == closestKernel) {
          continue;
        }

        double distance = distance(kernel.getCenter(), center);
        radius = Math.min(distance, radius);
      }
    } else {
      radius = closestKernel.getRadius();
    }

    if (minDistance < radius) {
      // Date fits, put into kernel and be happy
      closestKernel.insert(instance, timestamp);
      return;
    }

    // 3. Date does not fit, we need to free
    // some space to insert a new kernel
    long threshold = timestamp - timeWindow; // Kernels before this can be forgotten

    // 3.1 Try to forget old kernels
    for (int i = 0; i < kernels.length; i++) {
      if (kernels[i].getRelevanceStamp() < threshold) {
        kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m);
        return;
      }
    }

    // 3.2 Merge closest two kernels
    int closestA = 0;
    int closestB = 0;
    minDistance = Double.MAX_VALUE;
    for (int i = 0; i < kernels.length; i++) {
      double[] centerA = kernels[i].getCenter();
      for (int j = i + 1; j < kernels.length; j++) {
        double dist = distance(centerA, kernels[j].getCenter());
        if (dist < minDistance) {
          minDistance = dist;
          closestA = i;
          closestB = j;
        }
      }
    }
    assert (closestA != closestB);

    kernels[closestA].add(kernels[closestB]);
    kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m);
  }