in src/productquantizer.cc [86:133]
void ProductQuantizer::MStep(
const real* x0,
real* centroids,
const uint8_t* codes,
int32_t d,
int32_t n) {
std::vector<int32_t> nelts(ksub_, 0);
memset(centroids, 0, sizeof(real) * d * ksub_);
const real* x = x0;
for (auto i = 0; i < n; i++) {
auto k = codes[i];
real* c = centroids + k * d;
for (auto j = 0; j < d; j++) {
c[j] += x[j];
}
nelts[k]++;
x += d;
}
real* c = centroids;
for (auto k = 0; k < ksub_; k++) {
real z = (real)nelts[k];
if (z != 0) {
for (auto j = 0; j < d; j++) {
c[j] /= z;
}
}
c += d;
}
std::uniform_real_distribution<> runiform(0, 1);
for (auto k = 0; k < ksub_; k++) {
if (nelts[k] == 0) {
int32_t m = 0;
while (runiform(rng) * (n - ksub_) >= nelts[m] - 1) {
m = (m + 1) % ksub_;
}
memcpy(centroids + k * d, centroids + m * d, sizeof(real) * d);
for (auto j = 0; j < d; j++) {
int32_t sign = (j % 2) * 2 - 1;
centroids[k * d + j] += sign * eps_;
centroids[m * d + j] -= sign * eps_;
}
nelts[k] = nelts[m] / 2;
nelts[m] -= nelts[k];
}
}
}