in hypernymysuite/reader.py [0:0]
def read_sparse_matrix(filename, allow_binary_cache=False, same_vocab=False):
"""
Reads in a 3 column file as a sparse matrix, where each line (x, y, v)
gives the name of the row x, column y, and the value z.
If filename ends with .gz, will assume the file is gzip compressed.
Args:
filename: str. The filename containing sparse matrix in 3-col format.
allow_binary_cache: bool. If true, caches the matrix in a pkl file with
the same filename for faster reads. If cache doesn't exist, will
create it.
same_vocab: bool. Indicates whether rows and columns have the same vocab.
Returns:
A tuple containing (spmatrix, id2row, row2id, col2id):
spmatrix: a scipy.sparse matrix with the entries
id2row: a list[str] containing the names for the rows of the matrix
row2id: a dict[str,int] mapping words to row indices
col2id: a dict[str,int] mapping words to col indices. If same_vocab,
this is identical to row2id.
"""
# make sure the cache is new enough
cache_filename = filename + ".pkl"
cache_exists = os.path.exists(cache_filename)
cache_fresh = cache_exists and os.path.getmtime(filename) <= os.path.getmtime(
cache_filename
)
if allow_binary_cache and cache_fresh:
logging.debug("Using space cache {}".format(cache_filename))
with open(cache_filename + ".pkl", "rb") as pklf:
return pickle.load(pklf)
else:
# binary cache is not allowed, or it's stale
result = __load_sparse_matrix(filename, same_vocab=same_vocab)
if allow_binary_cache:
logging.warning("Dumping the binary cache {}.pkl".format(filename))
with open(filename + ".pkl", "wb") as pklf:
pickle.dump(result, pklf)
return result