def __load_sparse_matrix()

in hypernymysuite/reader.py [0:0]


def __load_sparse_matrix(filename, same_vocab):
    """
    Actual workhorse for loading a sparse matrix. See docstring for
    read_sparse_matrix.

    """
    objects = ["<OOV>"]
    rowvocab = {"<OOV>": 0}
    if same_vocab:
        colvocab = rowvocab
    else:
        colvocab = {}
    _is = []
    _js = []
    _vs = []

    # Read gzip files
    if filename.endswith(".gz"):
        f = gzip.open(filename, "r")
    else:
        f = open(filename, "rb")

    for line in f:
        line = line.decode("utf-8")
        target, context, weight = __try_three_columns(line)
        if target not in rowvocab:
            rowvocab[target] = len(rowvocab)
            objects.append(target)
        if context not in colvocab:
            colvocab[context] = len(colvocab)
            if same_vocab:
                objects.append(context)

        _is.append(rowvocab[target])
        _js.append(colvocab[context])
        _vs.append(weight)

    # clean up
    f.close()

    _shape = (len(rowvocab), len(colvocab))
    spmatrix = sp.csr_matrix((_vs, (_is, _js)), shape=_shape, dtype=np.float64)
    return spmatrix, objects, rowvocab, colvocab