in paq/paq_utils.py [0:0]
def parse_vectors_from_directory_memory_friendly(embeddings_dir, size=None):
paths = get_vectors_file_paths_in_vector_directory(embeddings_dir)
if size is None:
size = 0
for j, p in enumerate(paths):
logger.info(f'Loading vectors from {p} ({j+1} / {len(paths)}) to find total num vectors')
m = torch.load(p)
size += m.shape[0]
out = None
offset = 0
for j, p in enumerate(paths):
logger.info(f'Loading vectors from {p} ({j+1} / {len(paths)})')
m = torch.load(p)
assert int(p.split('.')[-1]) == j, (p, j)
if out is None:
out = torch.zeros(size, m.shape[1])
out[offset: offset + m.shape[0]] = m
offset += m.shape[0]
assert offset == size
logger.info(f'loaded index of shape {out.shape}')
return out