in faiss/python/__init__.py [0:0]
def handle_Index(the_class):
def replacement_add(self, x):
"""Adds vectors to the index.
The index must be trained before vectors can be added to it.
The vectors are implicitly numbered in sequence. When `n` vectors are
added to the index, they are given ids `ntotal`, `ntotal + 1`, ..., `ntotal + n - 1`.
Parameters
----------
x : array_like
Query vectors, shape (n, d) where d is appropriate for the index.
`dtype` must be float32.
"""
n, d = x.shape
assert d == self.d
self.add_c(n, swig_ptr(x))
def replacement_add_with_ids(self, x, ids):
"""Adds vectors with arbitrary ids to the index (not all indexes support this).
The index must be trained before vectors can be added to it.
Vector `i` is stored in `x[i]` and has id `ids[i]`.
Parameters
----------
x : array_like
Query vectors, shape (n, d) where d is appropriate for the index.
`dtype` must be float32.
ids : array_like
Array if ids of size n. The ids must be of type `int64`. Note that `-1` is reserved
in result lists to mean "not found" so it's better to not use it as an id.
"""
n, d = x.shape
assert d == self.d
assert ids.shape == (n, ), 'not same nb of vectors as ids'
self.add_with_ids_c(n, swig_ptr(x), swig_ptr(ids))
def replacement_assign(self, x, k, labels=None):
"""Find the k nearest neighbors of the set of vectors x in the index.
This is the same as the `search` method, but discards the distances.
Parameters
----------
x : array_like
Query vectors, shape (n, d) where d is appropriate for the index.
`dtype` must be float32.
k : int
Number of nearest neighbors.
labels : array_like, optional
Labels array to store the results.
Returns
-------
labels: array_like
Labels of the nearest neighbors, shape (n, k).
When not enough results are found, the label is set to -1
"""
n, d = x.shape
assert d == self.d
if labels is None:
labels = np.empty((n, k), dtype=np.int64)
else:
assert labels.shape == (n, k)
self.assign_c(n, swig_ptr(x), swig_ptr(labels), k)
return labels
def replacement_train(self, x):
"""Trains the index on a representative set of vectors.
The index must be trained before vectors can be added to it.
Parameters
----------
x : array_like
Query vectors, shape (n, d) where d is appropriate for the index.
`dtype` must be float32.
"""
n, d = x.shape
assert d == self.d
self.train_c(n, swig_ptr(x))
def replacement_search(self, x, k, D=None, I=None):
"""Find the k nearest neighbors of the set of vectors x in the index.
Parameters
----------
x : array_like
Query vectors, shape (n, d) where d is appropriate for the index.
`dtype` must be float32.
k : int
Number of nearest neighbors.
D : array_like, optional
Distance array to store the result.
I : array_like, optional
Labels array to store the results.
Returns
-------
D : array_like
Distances of the nearest neighbors, shape (n, k). When not enough results are found
the label is set to +Inf or -Inf.
I : array_like
Labels of the nearest neighbors, shape (n, k).
When not enough results are found, the label is set to -1
"""
n, d = x.shape
assert d == self.d
assert k > 0
if D is None:
D = np.empty((n, k), dtype=np.float32)
else:
assert D.shape == (n, k)
if I is None:
I = np.empty((n, k), dtype=np.int64)
else:
assert I.shape == (n, k)
self.search_c(n, swig_ptr(x), k, swig_ptr(D), swig_ptr(I))
return D, I
def replacement_search_and_reconstruct(self, x, k, D=None, I=None, R=None):
"""Find the k nearest neighbors of the set of vectors x in the index,
and return an approximation of these vectors.
Parameters
----------
x : array_like
Query vectors, shape (n, d) where d is appropriate for the index.
`dtype` must be float32.
k : int
Number of nearest neighbors.
D : array_like, optional
Distance array to store the result.
I : array_like, optional
Labels array to store the result.
R : array_like, optional
reconstruction array to store
Returns
-------
D : array_like
Distances of the nearest neighbors, shape (n, k). When not enough results are found
the label is set to +Inf or -Inf.
I : array_like
Labels of the nearest neighbors, shape (n, k). When not enough results are found,
the label is set to -1
R : array_like
Approximate (reconstructed) nearest neighbor vectors, shape (n, k, d).
"""
n, d = x.shape
assert d == self.d
assert k > 0
if D is None:
D = np.empty((n, k), dtype=np.float32)
else:
assert D.shape == (n, k)
if I is None:
I = np.empty((n, k), dtype=np.int64)
else:
assert I.shape == (n, k)
if R is None:
R = np.empty((n, k, d), dtype=np.float32)
else:
assert R.shape == (n, k, d)
self.search_and_reconstruct_c(n, swig_ptr(x),
k, swig_ptr(D),
swig_ptr(I),
swig_ptr(R))
return D, I, R
def replacement_remove_ids(self, x):
"""Remove some ids from the index.
This is a O(ntotal) operation by default, so could be expensive.
Parameters
----------
x : array_like or faiss.IDSelector
Either an IDSelector that returns True for vectors to remove, or a
list of ids to reomove (1D array of int64). When `x` is a list,
it is wrapped into an IDSelector.
Returns
-------
n_remove: int
number of vectors that were removed
"""
if isinstance(x, IDSelector):
sel = x
else:
assert x.ndim == 1
index_ivf = try_extract_index_ivf (self)
if index_ivf and index_ivf.direct_map.type == DirectMap.Hashtable:
sel = IDSelectorArray(x.size, swig_ptr(x))
else:
sel = IDSelectorBatch(x.size, swig_ptr(x))
return self.remove_ids_c(sel)
def replacement_reconstruct(self, key, x=None):
"""Approximate reconstruction of one vector from the index.
Parameters
----------
key : int
Id of the vector to reconstruct
x : array_like, optional
pre-allocated array to store the results
Returns
-------
x : array_like
Reconstructed vector, size `self.d`, `dtype`=float32
"""
if x is None:
x = np.empty(self.d, dtype=np.float32)
else:
assert x.shape == (self.d, )
self.reconstruct_c(key, swig_ptr(x))
return x
def replacement_reconstruct_n(self, n0, ni, x=None):
"""Approximate reconstruction of vectors `n0` ... `n0 + ni - 1` from the index.
Missing vectors trigger an exception.
Parameters
----------
n0 : int
Id of the first vector to reconstruct
ni : int
Number of vectors to reconstruct
x : array_like, optional
pre-allocated array to store the results
Returns
-------
x : array_like
Reconstructed vectors, size (`ni`, `self.d`), `dtype`=float32
"""
if x is None:
x = np.empty((ni, self.d), dtype=np.float32)
else:
assert x.shape == (ni, self.d)
self.reconstruct_n_c(n0, ni, swig_ptr(x))
return x
def replacement_update_vectors(self, keys, x):
n = keys.size
assert keys.shape == (n, )
assert x.shape == (n, self.d)
self.update_vectors_c(n, swig_ptr(keys), swig_ptr(x))
# The CPU does not support passed-in output buffers
def replacement_range_search(self, x, thresh):
"""Search vectors that are within a distance of the query vectors.
Parameters
----------
x : array_like
Query vectors, shape (n, d) where d is appropriate for the index.
`dtype` must be float32.
thresh : float
Threshold to select neighbors. All elements within this radius are returned,
except for maximum inner product indexes, where the elements above the
threshold are returned
Returns
-------
lims: array_like
Startring index of the results for each query vector, size n+1.
D : array_like
Distances of the nearest neighbors, shape `lims[n]`. The distances for
query i are in `D[lims[i]:lims[i+1]]`.
I : array_like
Labels of nearest neighbors, shape `lims[n]`. The labels for query i
are in `I[lims[i]:lims[i+1]]`.
"""
n, d = x.shape
assert d == self.d
res = RangeSearchResult(n)
self.range_search_c(n, swig_ptr(x), thresh, res)
# get pointers and copy them
lims = rev_swig_ptr(res.lims, n + 1).copy()
nd = int(lims[-1])
D = rev_swig_ptr(res.distances, nd).copy()
I = rev_swig_ptr(res.labels, nd).copy()
return lims, D, I
def replacement_sa_encode(self, x, codes=None):
n, d = x.shape
assert d == self.d
if codes is None:
codes = np.empty((n, self.sa_code_size()), dtype=np.uint8)
else:
assert codes.shape == (n, self.sa_code_size())
self.sa_encode_c(n, swig_ptr(x), swig_ptr(codes))
return codes
def replacement_sa_decode(self, codes, x=None):
n, cs = codes.shape
assert cs == self.sa_code_size()
if x is None:
x = np.empty((n, self.d), dtype=np.float32)
else:
assert x.shape == (n, self.d)
self.sa_decode_c(n, swig_ptr(codes), swig_ptr(x))
return x
def replacement_add_sa_codes(self, codes, ids=None):
n, cs = codes.shape
assert cs == self.sa_code_size()
if ids is not None:
assert ids.shape == (n,)
ids = swig_ptr(ids)
self.add_sa_codes_c(n, swig_ptr(codes), ids)
replace_method(the_class, 'add', replacement_add)
replace_method(the_class, 'add_with_ids', replacement_add_with_ids)
replace_method(the_class, 'assign', replacement_assign)
replace_method(the_class, 'train', replacement_train)
replace_method(the_class, 'search', replacement_search)
replace_method(the_class, 'remove_ids', replacement_remove_ids)
replace_method(the_class, 'reconstruct', replacement_reconstruct)
replace_method(the_class, 'reconstruct_n', replacement_reconstruct_n)
replace_method(the_class, 'range_search', replacement_range_search)
replace_method(the_class, 'update_vectors', replacement_update_vectors,
ignore_missing=True)
replace_method(the_class, 'search_and_reconstruct',
replacement_search_and_reconstruct, ignore_missing=True)
replace_method(the_class, 'sa_encode', replacement_sa_encode)
replace_method(the_class, 'sa_decode', replacement_sa_decode)
replace_method(the_class, 'add_sa_codes', replacement_add_sa_codes,
ignore_missing=True)
# get/set state for pickle
# the data is serialized to std::vector -> numpy array -> python bytes
# so not very efficient for now.
def index_getstate(self):
return {"this": serialize_index(self).tobytes()}
def index_setstate(self, st):
index2 = deserialize_index(np.frombuffer(st["this"], dtype="uint8"))
self.this = index2.this
the_class.__getstate__ = index_getstate
the_class.__setstate__ = index_setstate