def dataframe_chunk_generator()

in src/alpaca_eval/utils.py [0:0]


def dataframe_chunk_generator(df: pd.DataFrame, chunksize: Optional[int] = None, tqdm_desc: Optional[str] = None):
    """Generator that yields chunks of a dataframe.

    Parameters
    ----------
    df : pd.DataFrame
        The dataframe to split into chunks.

    chunksize : int, optional
        The size of the chunks. If None, the chunksize will be the length of the dataframe.

    tqdm_desc : bool, optional
        Description to display in the tqdm progress bar. If None, no progress bar will be displayed.
    """
    if chunksize is None:
        chunksize = max(1, len(df))

    iterator = range(0, len(df), chunksize)

    if tqdm_desc is not None:
        iterator = tqdm.tqdm(iterator, desc=tqdm_desc)

    n_iter = len(df) // chunksize

    for i in iterator:
        df_chunk = df.iloc[i : i + chunksize]

        # if many iterations then better to copy the dataframe to avoid memory issues
        if n_iter > 1:
            df_chunk = df_chunk.copy()

        yield df_chunk