in lib/data.py [0:0]
def load_bigann(device, size = 10 ** 6, test=True, qsize=10 ** 5):
basedir = getBasedir("bigann")
dbsize = int(size / 10 ** 6)
xt = mmap_bvecs(join(basedir, 'bigann_learn.bvecs'))
if test:
xb = mmap_bvecs(join(basedir, 'bigann_base.bvecs'))
xq = mmap_bvecs(join(basedir, 'bigann_query.bvecs'))
# trim xb to correct size
xb = xb[:dbsize * 1000 * 1000]
gt = ivecs_read(join(basedir, 'gnd/idx_%dM.ivecs' % dbsize))
else:
xb = xt[:size]
xq = xt[size:size+qsize]
xt = xt[size+qsize:]
xb, xq = sanitize(xb), sanitize(xq)
if not test:
gt = get_nearestneighbors(xq, xb, 100, device)
return xt, xb, xq, gt