in datasets/imdb/pipelines/_images/run_csv_transform_kub/csv_transform.py [0:0]
def chunk_clean_akas(chunk: pd.DataFrame) -> pd.DataFrame:
for col in chunk:
if col in ("title", "region", "language"):
coldata_replace(chunk, col, {"\\N": None})
if col in ("types", "attributes"):
replace_unicode(chunk, col, "\x02", "&")
coldata_replace(chunk, col, {"\\N": None})
if col in ("isOriginalTitle"):
coldata_replace(
chunk, col, {"0": False, "1": True, "\\N": None, 0: False, 1: True}
)
if col in ("title"):
clean_data(chunk, col, "\n", "|")
logging.info(f"Dataframe chunk shape {chunk.shape}")
return chunk