def groupby_collect()

in src/rime/util/__init__.py [0:0]


def groupby_collect(series):
    """
    >>> groupby_collect(pd.Series([1,2,3,4,5], index=[1,1,2,3,3])).to_dict()
    {1: [1, 2], 2: [3], 3: [4, 5]}
    """
    last_i = None
    for i in series.index.values:
        if last_i is not None and last_i > i:
            warnings.warn("unsorted input to groupby_collect may be inefficient")
            series = series.sort_index(kind='mergesort')
            break
        last_i = i

    splits = np.where(
        np.array(series.index.values[1:]) != np.array(series.index.values[:-1])
    )[0] + 1

    return pd.Series(
        [x.tolist() for x in np.split(series.values, splits)],
        index=series.index.values[np.hstack([[0], splits])]
    ) if len(series) else pd.Series()