def read_sparse_matrix()

in hypernymysuite/reader.py [0:0]


def read_sparse_matrix(filename, allow_binary_cache=False, same_vocab=False):
    """
    Reads in a 3 column file as a sparse matrix, where each line (x, y, v)
    gives the name of the row x, column y, and the value z.

    If filename ends with .gz, will assume the file is gzip compressed.

    Args:
        filename: str. The filename containing sparse matrix in 3-col format.
        allow_binary_cache: bool. If true, caches the matrix in a pkl file with
            the same filename for faster reads. If cache doesn't exist, will
            create it.
        same_vocab: bool. Indicates whether rows and columns have the same vocab.

    Returns:
        A tuple containing (spmatrix, id2row, row2id, col2id):
            spmatrix: a scipy.sparse matrix with the entries
            id2row: a list[str] containing the names for the rows of the matrix
            row2id: a dict[str,int] mapping words to row indices
            col2id: a dict[str,int] mapping words to col indices. If same_vocab,
                this is identical to row2id.
    """
    # make sure the cache is new enough
    cache_filename = filename + ".pkl"
    cache_exists = os.path.exists(cache_filename)
    cache_fresh = cache_exists and os.path.getmtime(filename) <= os.path.getmtime(
        cache_filename
    )
    if allow_binary_cache and cache_fresh:
        logging.debug("Using space cache {}".format(cache_filename))
        with open(cache_filename + ".pkl", "rb") as pklf:
            return pickle.load(pklf)
    else:
        # binary cache is not allowed, or it's stale
        result = __load_sparse_matrix(filename, same_vocab=same_vocab)
        if allow_binary_cache:
            logging.warning("Dumping the binary cache {}.pkl".format(filename))
            with open(filename + ".pkl", "wb") as pklf:
                pickle.dump(result, pklf)
        return result