in samoa-api/src/main/java/org/apache/samoa/moa/clusterers/clustream/Clustream.java [80:178]
public void trainOnInstanceImpl(Instance instance) {
int dim = instance.numValues();
timestamp++;
// 0. Initialize
if (!initialized) {
if (buffer.size() < bufferSize) {
buffer.add(new ClustreamKernel(instance, dim, timestamp, t, m));
return;
}
int k = kernels.length;
// System.err.println("k="+k+" bufferSize="+bufferSize);
assert (k <= bufferSize);
ClustreamKernel[] centers = new ClustreamKernel[k];
for (int i = 0; i < k; i++) {
centers[i] = buffer.get(i); // TODO: make random!
}
Clustering kmeans_clustering = kMeans(k, centers, buffer);
// Clustering kmeans_clustering = kMeans(k, buffer);
for (int i = 0; i < kmeans_clustering.size(); i++) {
kernels[i] = new ClustreamKernel(new DenseInstance(1.0, centers[i].getCenter()), dim, timestamp, t, m);
}
buffer.clear();
initialized = true;
return;
}
// 1. Determine closest kernel
ClustreamKernel closestKernel = null;
double minDistance = Double.MAX_VALUE;
for (ClustreamKernel kernel : kernels) {
// System.out.println(i+" "+kernels[i].getWeight()+" "+kernels[i].getDeviation());
double distance = distance(instance.toDoubleArray(), kernel.getCenter());
if (distance < minDistance) {
closestKernel = kernel;
minDistance = distance;
}
}
// 2. Check whether instance fits into closestKernel
double radius;
if (closestKernel != null && closestKernel.getWeight() == 1) {
// Special case: estimate radius by determining the distance to the
// next closest cluster
radius = Double.MAX_VALUE;
double[] center = closestKernel.getCenter();
for (ClustreamKernel kernel : kernels) {
if (kernel == closestKernel) {
continue;
}
double distance = distance(kernel.getCenter(), center);
radius = Math.min(distance, radius);
}
} else {
radius = closestKernel.getRadius();
}
if (minDistance < radius) {
// Date fits, put into kernel and be happy
closestKernel.insert(instance, timestamp);
return;
}
// 3. Date does not fit, we need to free
// some space to insert a new kernel
long threshold = timestamp - timeWindow; // Kernels before this can be forgotten
// 3.1 Try to forget old kernels
for (int i = 0; i < kernels.length; i++) {
if (kernels[i].getRelevanceStamp() < threshold) {
kernels[i] = new ClustreamKernel(instance, dim, timestamp, t, m);
return;
}
}
// 3.2 Merge closest two kernels
int closestA = 0;
int closestB = 0;
minDistance = Double.MAX_VALUE;
for (int i = 0; i < kernels.length; i++) {
double[] centerA = kernels[i].getCenter();
for (int j = i + 1; j < kernels.length; j++) {
double dist = distance(centerA, kernels[j].getCenter());
if (dist < minDistance) {
minDistance = dist;
closestA = i;
closestB = j;
}
}
}
assert (closestA != closestB);
kernels[closestA].add(kernels[closestB]);
kernels[closestB] = new ClustreamKernel(instance, dim, timestamp, t, m);
}