in source/nli.py [0:0]
def LoadDataNLI(fn1, fn2, fn_lbl,
dim=1024, bsize=32,
fraction=1.0,
shuffle=False, quiet=False):
x = np.fromfile(fn1, dtype=np.float32, count=-1)
x.resize(x.shape[0] // dim, dim)
faiss.normalize_L2(x)
y = np.fromfile(fn2, dtype=np.float32, count=-1)
y.resize(y.shape[0] // dim, dim)
faiss.normalize_L2(y)
lbl = np.loadtxt(fn_lbl, dtype=np.int32)
lbl.reshape(lbl.shape[0], 1)
if not quiet:
print(' - read {:d}x{:d} elements in {:s}'.format(x.shape[0], x.shape[1], fn1))
print(' - read {:d}x{:d} elements in {:s}'.format(y.shape[0], y.shape[1], fn2))
print(' - read {:d} labels [{:d},{:d}] in {:s}'
.format(lbl.shape[0], lbl.min(), lbl.max(), fn_lbl))
if fraction < 1.0:
N = int(x.shape[0] * fraction)
if not quiet:
print(' - using only the first {:d} examples'.format(N))
x = x[:N][:]
y = y[:N][:]
lbl = lbl[:N][:]
if not quiet:
print(' - combine premises and hyps')
nli = np.concatenate((x, y, np.absolute(x - y), np.multiply(x, y)), axis=1)
D = data_utils.TensorDataset(torch.from_numpy(nli), torch.from_numpy(lbl))
loader = data_utils.DataLoader(D, batch_size=bsize, shuffle=shuffle)
return loader