in faiss/index_factory.cpp [514:692]
std::unique_ptr<Index> index_factory_sub(
int d,
std::string description,
MetricType metric) {
// handle composite indexes
bool verbose = index_factory_verbose;
if (verbose) {
printf("begin parse VectorTransforms: %s \n", description.c_str());
}
// for the current match
std::smatch sm;
// handle refines
if (re_match(description, "(.+),RFlat", sm) ||
re_match(description, "(.+),Refine\\((.+)\\)", sm)) {
std::unique_ptr<Index> filter_index =
index_factory_sub(d, sm[1].str(), metric);
std::unique_ptr<Index> refine_index;
if (sm.size() == 3) { // Refine
refine_index = index_factory_sub(d, sm[2].str(), metric);
} else { // RFlat
refine_index.reset(new IndexFlat(d, metric));
}
IndexRefine* index_rf =
new IndexRefine(filter_index.get(), refine_index.get());
index_rf->own_fields = true;
filter_index.release();
refine_index.release();
index_rf->own_refine_index = true;
return std::unique_ptr<Index>(index_rf);
}
// IndexPreTransform
// should handle this first (even before parentheses) because it changes d
std::vector<std::unique_ptr<VectorTransform>> vts;
VectorTransform* vt = nullptr;
while (re_match(description, "([^,]+),(.*)", sm) &&
(vt = parse_VectorTransform(sm[1], d))) {
// reset loop
description = sm[sm.size() - 1];
vts.emplace_back(vt);
d = vts.back()->d_out;
}
if (vts.size() > 0) {
std::unique_ptr<Index> sub_index =
index_factory_sub(d, description, metric);
IndexPreTransform* index_pt = new IndexPreTransform(sub_index.get());
std::unique_ptr<Index> ret(index_pt);
index_pt->own_fields = true;
sub_index.release();
while (vts.size() > 0) {
if (verbose) {
printf("prepend trans %d -> %d\n",
vts.back()->d_in,
vts.back()->d_out);
}
index_pt->prepend_transform(vts.back().release());
vts.pop_back();
}
return ret;
}
// what we got from the parentheses
std::vector<std::unique_ptr<Index>> parenthesis_indexes;
int begin = 0;
while (description.find('(', begin) != std::string::npos) {
// replace indexes in () with Index0, Index1, etc.
int i0, i1;
find_matching_parentheses(description, i0, i1, begin);
std::string sub_description = description.substr(i0 + 1, i1 - i0 - 1);
int no = parenthesis_indexes.size();
parenthesis_indexes.push_back(
index_factory_sub(d, sub_description, metric));
description = description.substr(0, i0 + 1) + "Index" +
std::to_string(no) + description.substr(i1);
begin = i1 + 1;
}
if (verbose) {
printf("after () normalization: %s %ld parenthesis indexes d=%d\n",
description.c_str(),
parenthesis_indexes.size(),
d);
}
// IndexIDMap -- it turns out is was used both as a prefix and a suffix, so
// support both
if (re_match(description, "(.+),IDMap", sm) ||
re_match(description, "IDMap,(.+)", sm)) {
IndexIDMap* idmap = new IndexIDMap(
index_factory_sub(d, sm[1].str(), metric).release());
idmap->own_fields = true;
return std::unique_ptr<Index>(idmap);
}
{ // handle basic index types
Index* index = parse_other_indexes(description, d, metric);
if (index) {
return std::unique_ptr<Index>(index);
}
}
// HNSW variants (it was unclear in the old version that the separator was a
// "," so we support both "_" and ",")
if (re_match(description, "HNSW([0-9]*)([,_].*)?", sm)) {
int hnsw_M = mres_to_int(sm[1], 32);
// We also accept empty code string (synonym of Flat)
std::string code_string =
sm[2].length() > 0 ? sm[2].str().substr(1) : "";
if (verbose) {
printf("parsing HNSW string %s code_string=%s hnsw_M=%d\n",
description.c_str(),
code_string.c_str(),
hnsw_M);
}
IndexHNSW* index = parse_IndexHNSW(code_string, d, metric, hnsw_M);
FAISS_THROW_IF_NOT_FMT(
index,
"could not parse HNSW code description %s in %s",
code_string.c_str(),
description.c_str());
return std::unique_ptr<Index>(index);
}
// IndexIVF
{
size_t nlist;
bool use_2layer;
size_t comma = description.find(",");
std::string coarse_string = description.substr(0, comma);
// Match coarse quantizer part first
std::unique_ptr<Index> quantizer(parse_coarse_quantizer(
description.substr(0, comma),
d,
metric,
parenthesis_indexes,
nlist,
use_2layer));
if (comma != std::string::npos && quantizer.get()) {
std::string code_description = description.substr(comma + 1);
if (use_2layer) {
bool ok =
re_match(code_description, "PQ([0-9]+)(x[0-9]+)?", sm);
FAISS_THROW_IF_NOT_FMT(
ok,
"could not parse 2 layer code description %s in %s",
code_description.c_str(),
description.c_str());
int M = std::stoi(sm[1].str()), nbit = mres_to_int(sm[2], 8, 1);
Index2Layer* index_2l =
new Index2Layer(quantizer.release(), nlist, M, nbit);
index_2l->q1.own_fields = true;
index_2l->q1.quantizer_trains_alone =
get_trains_alone(index_2l->q1.quantizer);
return std::unique_ptr<Index>(index_2l);
}
IndexIVF* index_ivf =
parse_IndexIVF(code_description, quantizer, nlist, metric);
FAISS_THROW_IF_NOT_FMT(
index_ivf,
"could not parse code description %s in %s",
code_description.c_str(),
description.c_str());
return std::unique_ptr<Index>(fix_ivf_fields(index_ivf));
}
}
FAISS_THROW_FMT("could not parse index string %s", description.c_str());
return nullptr;
}