in faiss/IndexHNSW.cpp [111:245]
void hnsw_add_vertices(
IndexHNSW& index_hnsw,
size_t n0,
size_t n,
const float* x,
bool verbose,
bool preset_levels = false) {
size_t d = index_hnsw.d;
HNSW& hnsw = index_hnsw.hnsw;
size_t ntotal = n0 + n;
double t0 = getmillisecs();
if (verbose) {
printf("hnsw_add_vertices: adding %zd elements on top of %zd "
"(preset_levels=%d)\n",
n,
n0,
int(preset_levels));
}
if (n == 0) {
return;
}
int max_level = hnsw.prepare_level_tab(n, preset_levels);
if (verbose) {
printf(" max_level = %d\n", max_level);
}
std::vector<omp_lock_t> locks(ntotal);
for (int i = 0; i < ntotal; i++)
omp_init_lock(&locks[i]);
// add vectors from highest to lowest level
std::vector<int> hist;
std::vector<int> order(n);
{ // make buckets with vectors of the same level
// build histogram
for (int i = 0; i < n; i++) {
storage_idx_t pt_id = i + n0;
int pt_level = hnsw.levels[pt_id] - 1;
while (pt_level >= hist.size())
hist.push_back(0);
hist[pt_level]++;
}
// accumulate
std::vector<int> offsets(hist.size() + 1, 0);
for (int i = 0; i < hist.size() - 1; i++) {
offsets[i + 1] = offsets[i] + hist[i];
}
// bucket sort
for (int i = 0; i < n; i++) {
storage_idx_t pt_id = i + n0;
int pt_level = hnsw.levels[pt_id] - 1;
order[offsets[pt_level]++] = pt_id;
}
}
idx_t check_period = InterruptCallback::get_period_hint(
max_level * index_hnsw.d * hnsw.efConstruction);
{ // perform add
RandomGenerator rng2(789);
int i1 = n;
for (int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) {
int i0 = i1 - hist[pt_level];
if (verbose) {
printf("Adding %d elements at level %d\n", i1 - i0, pt_level);
}
// random permutation to get rid of dataset order bias
for (int j = i0; j < i1; j++)
std::swap(order[j], order[j + rng2.rand_int(i1 - j)]);
bool interrupt = false;
#pragma omp parallel if (i1 > i0 + 100)
{
VisitedTable vt(ntotal);
DistanceComputer* dis =
storage_distance_computer(index_hnsw.storage);
ScopeDeleter1<DistanceComputer> del(dis);
int prev_display =
verbose && omp_get_thread_num() == 0 ? 0 : -1;
size_t counter = 0;
#pragma omp for schedule(dynamic)
for (int i = i0; i < i1; i++) {
storage_idx_t pt_id = order[i];
dis->set_query(x + (pt_id - n0) * d);
// cannot break
if (interrupt) {
continue;
}
hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt);
if (prev_display >= 0 && i - i0 > prev_display + 10000) {
prev_display = i - i0;
printf(" %d / %d\r", i - i0, i1 - i0);
fflush(stdout);
}
if (counter % check_period == 0) {
if (InterruptCallback::is_interrupted()) {
interrupt = true;
}
}
counter++;
}
}
if (interrupt) {
FAISS_THROW_MSG("computation interrupted");
}
i1 = i0;
}
FAISS_ASSERT(i1 == 0);
}
if (verbose) {
printf("Done in %.3f ms\n", getmillisecs() - t0);
}
for (int i = 0; i < ntotal; i++) {
omp_destroy_lock(&locks[i]);
}
}