def LoadDataNLI()

in source/nli.py [0:0]


def LoadDataNLI(fn1, fn2, fn_lbl,
                dim=1024, bsize=32,
                fraction=1.0,
                shuffle=False, quiet=False):
    x = np.fromfile(fn1, dtype=np.float32, count=-1)
    x.resize(x.shape[0] // dim, dim)
    faiss.normalize_L2(x)

    y = np.fromfile(fn2, dtype=np.float32, count=-1)
    y.resize(y.shape[0] // dim, dim)
    faiss.normalize_L2(y)

    lbl = np.loadtxt(fn_lbl, dtype=np.int32)
    lbl.reshape(lbl.shape[0], 1)

    if not quiet:
        print(' - read {:d}x{:d} elements in {:s}'.format(x.shape[0], x.shape[1], fn1))
        print(' - read {:d}x{:d} elements in {:s}'.format(y.shape[0], y.shape[1], fn2))
        print(' - read {:d} labels [{:d},{:d}] in {:s}'
              .format(lbl.shape[0], lbl.min(), lbl.max(), fn_lbl))

    if fraction < 1.0:
        N = int(x.shape[0] * fraction)
        if not quiet:
            print(' - using only the first {:d} examples'.format(N))
        x = x[:N][:]
        y = y[:N][:]
        lbl = lbl[:N][:]

    if not quiet:
        print(' - combine premises and hyps')
    nli = np.concatenate((x, y, np.absolute(x - y), np.multiply(x, y)), axis=1)

    D = data_utils.TensorDataset(torch.from_numpy(nli), torch.from_numpy(lbl))
    loader = data_utils.DataLoader(D, batch_size=bsize, shuffle=shuffle)
    return loader