def rename_duplicated_columns()

in awswrangler/catalog/_utils.py [0:0]


def rename_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
    """Append an incremental number to duplicate column names to conform with Amazon Athena.

    Note
    ----
    This transformation will run `inplace` and will make changes to the original DataFrame.

    Note
    ----
    Also handles potential new column duplicate conflicts by appending an additional `_n`.

    Parameters
    ----------
    df
        Original Pandas DataFrame.

    Returns
    -------
        DataFrame with duplicated column names renamed.

    Examples
    --------
    >>> df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [4, 6]})
    >>> df.columns = ['a', 'a', 'a_1']
    >>> wr.catalog.rename_duplicated_columns(df=df)
    a	a_1	a_1_1
    1	3	4
    2	4	6
    """
    names = df.columns
    set_names = set(names)
    if len(names) == len(set_names):
        return df
    d = {key: [name + f"_{i}" if i > 0 else name for i, name in enumerate(names[names == key])] for key in set_names}
    df.rename(columns=lambda c: d[c].pop(0), inplace=True)
    while df.columns.duplicated().any():
        # Catches edge cases where pd.DataFrame({"A": [1, 2], "a": [3, 4], "a_1": [5, 6]})
        df = rename_duplicated_columns(df)
    return df