in awswrangler/_data_types.py [0:0]
def _cast_pandas_column(df: pd.DataFrame, col: str, current_type: str, desired_type: str) -> pd.DataFrame:
if desired_type == "datetime64":
df[col] = pd.to_datetime(df[col])
elif desired_type == "date":
df[col] = df[col].apply(lambda x: _cast2date(value=x)).replace(to_replace={pd.NaT: None})
elif desired_type == "bytes":
df[col] = df[col].astype("string").str.encode(encoding="utf-8").replace(to_replace={pd.NA: None})
elif desired_type == "decimal":
# First cast to string
df = _cast_pandas_column(df=df, col=col, current_type=current_type, desired_type="string")
# Then cast to decimal
df[col] = df[col].apply(lambda x: Decimal(str(x)) if str(x) not in ("", "none", "None", " ", "<NA>") else None)
else:
try:
df[col] = df[col].astype(desired_type)
except TypeError as ex:
_logger.debug("Column: %s", col)
if "object cannot be converted to an IntegerDtype" not in str(ex):
raise ex
warnings.warn(
"Object cannot be converted to an IntegerDtype. Integer columns in Python cannot contain "
"missing values. If your input data contains missing values, it will be encoded as floats"
"which may cause precision loss.",
UserWarning,
)
df[col] = (
df[col]
.apply(lambda x: int(x) if str(x) not in ("", "none", "None", " ", "<NA>") else None)
.astype(desired_type)
)
return df