std::unique_ptr index_factory_sub()

in faiss/index_factory.cpp [514:692]


std::unique_ptr<Index> index_factory_sub(
        int d,
        std::string description,
        MetricType metric) {
    // handle composite indexes

    bool verbose = index_factory_verbose;

    if (verbose) {
        printf("begin parse VectorTransforms: %s \n", description.c_str());
    }

    // for the current match
    std::smatch sm;

    // handle refines
    if (re_match(description, "(.+),RFlat", sm) ||
        re_match(description, "(.+),Refine\\((.+)\\)", sm)) {
        std::unique_ptr<Index> filter_index =
                index_factory_sub(d, sm[1].str(), metric);
        std::unique_ptr<Index> refine_index;

        if (sm.size() == 3) { // Refine
            refine_index = index_factory_sub(d, sm[2].str(), metric);
        } else { // RFlat
            refine_index.reset(new IndexFlat(d, metric));
        }
        IndexRefine* index_rf =
                new IndexRefine(filter_index.get(), refine_index.get());
        index_rf->own_fields = true;
        filter_index.release();
        refine_index.release();
        index_rf->own_refine_index = true;
        return std::unique_ptr<Index>(index_rf);
    }

    // IndexPreTransform
    // should handle this first (even before parentheses) because it changes d
    std::vector<std::unique_ptr<VectorTransform>> vts;
    VectorTransform* vt = nullptr;
    while (re_match(description, "([^,]+),(.*)", sm) &&
           (vt = parse_VectorTransform(sm[1], d))) {
        // reset loop
        description = sm[sm.size() - 1];
        vts.emplace_back(vt);
        d = vts.back()->d_out;
    }

    if (vts.size() > 0) {
        std::unique_ptr<Index> sub_index =
                index_factory_sub(d, description, metric);
        IndexPreTransform* index_pt = new IndexPreTransform(sub_index.get());
        std::unique_ptr<Index> ret(index_pt);
        index_pt->own_fields = true;
        sub_index.release();
        while (vts.size() > 0) {
            if (verbose) {
                printf("prepend trans %d -> %d\n",
                       vts.back()->d_in,
                       vts.back()->d_out);
            }
            index_pt->prepend_transform(vts.back().release());
            vts.pop_back();
        }
        return ret;
    }

    // what we got from the parentheses
    std::vector<std::unique_ptr<Index>> parenthesis_indexes;

    int begin = 0;
    while (description.find('(', begin) != std::string::npos) {
        // replace indexes in () with Index0, Index1, etc.
        int i0, i1;
        find_matching_parentheses(description, i0, i1, begin);
        std::string sub_description = description.substr(i0 + 1, i1 - i0 - 1);
        int no = parenthesis_indexes.size();
        parenthesis_indexes.push_back(
                index_factory_sub(d, sub_description, metric));
        description = description.substr(0, i0 + 1) + "Index" +
                std::to_string(no) + description.substr(i1);
        begin = i1 + 1;
    }

    if (verbose) {
        printf("after () normalization: %s %ld parenthesis indexes d=%d\n",
               description.c_str(),
               parenthesis_indexes.size(),
               d);
    }

    // IndexIDMap -- it turns out is was used both as a prefix and a suffix, so
    // support both
    if (re_match(description, "(.+),IDMap", sm) ||
        re_match(description, "IDMap,(.+)", sm)) {
        IndexIDMap* idmap = new IndexIDMap(
                index_factory_sub(d, sm[1].str(), metric).release());
        idmap->own_fields = true;
        return std::unique_ptr<Index>(idmap);
    }

    { // handle basic index types
        Index* index = parse_other_indexes(description, d, metric);
        if (index) {
            return std::unique_ptr<Index>(index);
        }
    }

    // HNSW variants (it was unclear in the old version that the separator was a
    // "," so we support both "_" and ",")
    if (re_match(description, "HNSW([0-9]*)([,_].*)?", sm)) {
        int hnsw_M = mres_to_int(sm[1], 32);
        // We also accept empty code string (synonym of Flat)
        std::string code_string =
                sm[2].length() > 0 ? sm[2].str().substr(1) : "";
        if (verbose) {
            printf("parsing HNSW string %s code_string=%s hnsw_M=%d\n",
                   description.c_str(),
                   code_string.c_str(),
                   hnsw_M);
        }

        IndexHNSW* index = parse_IndexHNSW(code_string, d, metric, hnsw_M);
        FAISS_THROW_IF_NOT_FMT(
                index,
                "could not parse HNSW code description %s in %s",
                code_string.c_str(),
                description.c_str());
        return std::unique_ptr<Index>(index);
    }

    // IndexIVF
    {
        size_t nlist;
        bool use_2layer;
        size_t comma = description.find(",");
        std::string coarse_string = description.substr(0, comma);
        // Match coarse quantizer part first
        std::unique_ptr<Index> quantizer(parse_coarse_quantizer(
                description.substr(0, comma),
                d,
                metric,
                parenthesis_indexes,
                nlist,
                use_2layer));

        if (comma != std::string::npos && quantizer.get()) {
            std::string code_description = description.substr(comma + 1);
            if (use_2layer) {
                bool ok =
                        re_match(code_description, "PQ([0-9]+)(x[0-9]+)?", sm);
                FAISS_THROW_IF_NOT_FMT(
                        ok,
                        "could not parse 2 layer code description %s in %s",
                        code_description.c_str(),
                        description.c_str());
                int M = std::stoi(sm[1].str()), nbit = mres_to_int(sm[2], 8, 1);
                Index2Layer* index_2l =
                        new Index2Layer(quantizer.release(), nlist, M, nbit);
                index_2l->q1.own_fields = true;
                index_2l->q1.quantizer_trains_alone =
                        get_trains_alone(index_2l->q1.quantizer);
                return std::unique_ptr<Index>(index_2l);
            }

            IndexIVF* index_ivf =
                    parse_IndexIVF(code_description, quantizer, nlist, metric);

            FAISS_THROW_IF_NOT_FMT(
                    index_ivf,
                    "could not parse code description %s in %s",
                    code_description.c_str(),
                    description.c_str());
            return std::unique_ptr<Index>(fix_ivf_fields(index_ivf));
        }
    }
    FAISS_THROW_FMT("could not parse index string %s", description.c_str());
    return nullptr;
}