def handle_Index()

in faiss/python/__init__.py [0:0]


def handle_Index(the_class):

    def replacement_add(self, x):
        """Adds vectors to the index.
        The index must be trained before vectors can be added to it.
        The vectors are implicitly numbered in sequence. When `n` vectors are
        added to the index, they are given ids `ntotal`, `ntotal + 1`, ..., `ntotal + n - 1`.

        Parameters
        ----------
        x : array_like
            Query vectors, shape (n, d) where d is appropriate for the index.
            `dtype` must be float32.
        """

        n, d = x.shape
        assert d == self.d
        self.add_c(n, swig_ptr(x))

    def replacement_add_with_ids(self, x, ids):
        """Adds vectors with arbitrary ids to the index (not all indexes support this).
        The index must be trained before vectors can be added to it.
        Vector `i` is stored in `x[i]` and has id `ids[i]`.

        Parameters
        ----------
        x : array_like
            Query vectors, shape (n, d) where d is appropriate for the index.
            `dtype` must be float32.
        ids : array_like
            Array if ids of size n. The ids must be of type `int64`. Note that `-1` is reserved
            in result lists to mean "not found" so it's better to not use it as an id.
        """
        n, d = x.shape
        assert d == self.d

        assert ids.shape == (n, ), 'not same nb of vectors as ids'
        self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))

    def replacement_assign(self, x, k, labels=None):
        """Find the k nearest neighbors of the set of vectors x in the index.
        This is the same as the `search` method, but discards the distances.

        Parameters
        ----------
        x : array_like
            Query vectors, shape (n, d) where d is appropriate for the index.
            `dtype` must be float32.
        k : int
            Number of nearest neighbors.
        labels : array_like, optional
            Labels array to store the results.

        Returns
        -------
        labels: array_like
            Labels of the nearest neighbors, shape (n, k).
            When not enough results are found, the label is set to -1
        """
        n, d = x.shape
        assert d == self.d

        if labels is None:
            labels = np.empty((n, k), dtype=np.int64)
        else:
            assert labels.shape == (n, k)

        self.assign_c(n, swig_ptr(x), swig_ptr(labels), k)
        return labels

    def replacement_train(self, x):
        """Trains the index on a representative set of vectors.
        The index must be trained before vectors can be added to it.

        Parameters
        ----------
        x : array_like
            Query vectors, shape (n, d) where d is appropriate for the index.
            `dtype` must be float32.
        """
        n, d = x.shape
        assert d == self.d
        self.train_c(n, swig_ptr(x))

    def replacement_search(self, x, k, D=None, I=None):
        """Find the k nearest neighbors of the set of vectors x in the index.

        Parameters
        ----------
        x : array_like
            Query vectors, shape (n, d) where d is appropriate for the index.
            `dtype` must be float32.
        k : int
            Number of nearest neighbors.
        D : array_like, optional
            Distance array to store the result.
        I : array_like, optional
            Labels array to store the results.

        Returns
        -------
        D : array_like
            Distances of the nearest neighbors, shape (n, k). When not enough results are found
            the label is set to +Inf or -Inf.
        I : array_like
            Labels of the nearest neighbors, shape (n, k).
            When not enough results are found, the label is set to -1
        """

        n, d = x.shape
        assert d == self.d

        assert k > 0

        if D is None:
            D = np.empty((n, k), dtype=np.float32)
        else:
            assert D.shape == (n, k)

        if I is None:
            I = np.empty((n, k), dtype=np.int64)
        else:
            assert I.shape == (n, k)

        self.search_c(n, swig_ptr(x), k, swig_ptr(D), swig_ptr(I))
        return D, I

    def replacement_search_and_reconstruct(self, x, k, D=None, I=None, R=None):
        """Find the k nearest neighbors of the set of vectors x in the index,
        and return an approximation of these vectors.

        Parameters
        ----------
        x : array_like
            Query vectors, shape (n, d) where d is appropriate for the index.
            `dtype` must be float32.
        k : int
            Number of nearest neighbors.
        D : array_like, optional
            Distance array to store the result.
        I : array_like, optional
            Labels array to store the result.
        R : array_like, optional
            reconstruction array to store

        Returns
        -------
        D : array_like
            Distances of the nearest neighbors, shape (n, k). When not enough results are found
            the label is set to +Inf or -Inf.
        I : array_like
            Labels of the nearest neighbors, shape (n, k). When not enough results are found,
            the label is set to -1
        R : array_like
            Approximate (reconstructed) nearest neighbor vectors, shape (n, k, d).
        """
        n, d = x.shape
        assert d == self.d

        assert k > 0

        if D is None:
            D = np.empty((n, k), dtype=np.float32)
        else:
            assert D.shape == (n, k)

        if I is None:
            I = np.empty((n, k), dtype=np.int64)
        else:
            assert I.shape == (n, k)

        if R is None:
            R = np.empty((n, k, d), dtype=np.float32)
        else:
            assert R.shape == (n, k, d)

        self.search_and_reconstruct_c(n, swig_ptr(x),
                                      k, swig_ptr(D),
                                      swig_ptr(I),
                                      swig_ptr(R))
        return D, I, R

    def replacement_remove_ids(self, x):
        """Remove some ids from the index.
        This is a O(ntotal) operation by default, so could be expensive.

        Parameters
        ----------
        x : array_like or faiss.IDSelector
            Either an IDSelector that returns True for vectors to remove, or a
            list of ids to reomove (1D array of int64). When `x` is a list,
            it is wrapped into an IDSelector.

        Returns
        -------
        n_remove: int
            number of vectors that were removed
        """
        if isinstance(x, IDSelector):
            sel = x
        else:
            assert x.ndim == 1
            index_ivf = try_extract_index_ivf (self)
            if index_ivf and index_ivf.direct_map.type == DirectMap.Hashtable:
                sel = IDSelectorArray(x.size, swig_ptr(x))
            else:
                sel = IDSelectorBatch(x.size, swig_ptr(x))
        return self.remove_ids_c(sel)

    def replacement_reconstruct(self, key, x=None):
        """Approximate reconstruction of one vector from the index.

        Parameters
        ----------
        key : int
            Id of the vector to reconstruct
        x : array_like, optional
            pre-allocated array to store the results

        Returns
        -------
        x : array_like
            Reconstructed vector, size `self.d`, `dtype`=float32
        """
        if x is None:
            x = np.empty(self.d, dtype=np.float32)
        else:
            assert x.shape == (self.d, )

        self.reconstruct_c(key, swig_ptr(x))
        return x

    def replacement_reconstruct_n(self, n0, ni, x=None):
        """Approximate reconstruction of vectors `n0` ... `n0 + ni - 1` from the index.
        Missing vectors trigger an exception.

        Parameters
        ----------
        n0 : int
            Id of the first vector to reconstruct
        ni : int
            Number of vectors to reconstruct
        x : array_like, optional
            pre-allocated array to store the results

        Returns
        -------
        x : array_like
            Reconstructed vectors, size (`ni`, `self.d`), `dtype`=float32
        """
        if x is None:
            x = np.empty((ni, self.d), dtype=np.float32)
        else:
            assert x.shape == (ni, self.d)

        self.reconstruct_n_c(n0, ni, swig_ptr(x))
        return x

    def replacement_update_vectors(self, keys, x):
        n = keys.size
        assert keys.shape == (n, )
        assert x.shape == (n, self.d)

        self.update_vectors_c(n, swig_ptr(keys), swig_ptr(x))

    # The CPU does not support passed-in output buffers
    def replacement_range_search(self, x, thresh):
        """Search vectors that are within a distance of the query vectors.

        Parameters
        ----------
        x : array_like
            Query vectors, shape (n, d) where d is appropriate for the index.
            `dtype` must be float32.
        thresh : float
            Threshold to select neighbors. All elements within this radius are returned,
            except for maximum inner product indexes, where the elements above the
            threshold are returned

        Returns
        -------
        lims: array_like
            Startring index of the results for each query vector, size n+1.
        D : array_like
            Distances of the nearest neighbors, shape `lims[n]`. The distances for
            query i are in `D[lims[i]:lims[i+1]]`.
        I : array_like
            Labels of nearest neighbors, shape `lims[n]`. The labels for query i
            are in `I[lims[i]:lims[i+1]]`.

        """
        n, d = x.shape
        assert d == self.d

        res = RangeSearchResult(n)
        self.range_search_c(n, swig_ptr(x), thresh, res)
        # get pointers and copy them
        lims = rev_swig_ptr(res.lims, n + 1).copy()
        nd = int(lims[-1])
        D = rev_swig_ptr(res.distances, nd).copy()
        I = rev_swig_ptr(res.labels, nd).copy()
        return lims, D, I

    def replacement_sa_encode(self, x, codes=None):
        n, d = x.shape
        assert d == self.d

        if codes is None:
            codes = np.empty((n, self.sa_code_size()), dtype=np.uint8)
        else:
            assert codes.shape == (n, self.sa_code_size())

        self.sa_encode_c(n, swig_ptr(x), swig_ptr(codes))
        return codes

    def replacement_sa_decode(self, codes, x=None):
        n, cs = codes.shape
        assert cs == self.sa_code_size()

        if x is None:
            x = np.empty((n, self.d), dtype=np.float32)
        else:
            assert x.shape == (n, self.d)

        self.sa_decode_c(n, swig_ptr(codes), swig_ptr(x))
        return x

    def replacement_add_sa_codes(self, codes, ids=None):
        n, cs = codes.shape
        assert cs == self.sa_code_size()
        if ids is not None:
            assert ids.shape == (n,)
            ids = swig_ptr(ids)
        self.add_sa_codes_c(n, swig_ptr(codes), ids)

    replace_method(the_class, 'add', replacement_add)
    replace_method(the_class, 'add_with_ids', replacement_add_with_ids)
    replace_method(the_class, 'assign', replacement_assign)
    replace_method(the_class, 'train', replacement_train)
    replace_method(the_class, 'search', replacement_search)
    replace_method(the_class, 'remove_ids', replacement_remove_ids)
    replace_method(the_class, 'reconstruct', replacement_reconstruct)
    replace_method(the_class, 'reconstruct_n', replacement_reconstruct_n)
    replace_method(the_class, 'range_search', replacement_range_search)
    replace_method(the_class, 'update_vectors', replacement_update_vectors,
                   ignore_missing=True)
    replace_method(the_class, 'search_and_reconstruct',
                   replacement_search_and_reconstruct, ignore_missing=True)
    replace_method(the_class, 'sa_encode', replacement_sa_encode)
    replace_method(the_class, 'sa_decode', replacement_sa_decode)
    replace_method(the_class, 'add_sa_codes', replacement_add_sa_codes,
                ignore_missing=True)

    # get/set state for pickle
    # the data is serialized to std::vector -> numpy array -> python bytes
    # so not very efficient for now.

    def index_getstate(self):
        return {"this": serialize_index(self).tobytes()}

    def index_setstate(self, st):
        index2 = deserialize_index(np.frombuffer(st["this"], dtype="uint8"))
        self.this = index2.this

    the_class.__getstate__ = index_getstate
    the_class.__setstate__ = index_setstate