in awswrangler/catalog/_utils.py [0:0]
def rename_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
"""Append an incremental number to duplicate column names to conform with Amazon Athena.
Note
----
This transformation will run `inplace` and will make changes to the original DataFrame.
Note
----
Also handles potential new column duplicate conflicts by appending an additional `_n`.
Parameters
----------
df
Original Pandas DataFrame.
Returns
-------
DataFrame with duplicated column names renamed.
Examples
--------
>>> df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [4, 6]})
>>> df.columns = ['a', 'a', 'a_1']
>>> wr.catalog.rename_duplicated_columns(df=df)
a a_1 a_1_1
1 3 4
2 4 6
"""
names = df.columns
set_names = set(names)
if len(names) == len(set_names):
return df
d = {key: [name + f"_{i}" if i > 0 else name for i, name in enumerate(names[names == key])] for key in set_names}
df.rename(columns=lambda c: d[c].pop(0), inplace=True)
while df.columns.duplicated().any():
# Catches edge cases where pd.DataFrame({"A": [1, 2], "a": [3, 4], "a_1": [5, 6]})
df = rename_duplicated_columns(df)
return df