in datasets/imdb/pipelines/_images/run_csv_transform_kub/csv_transform.py [0:0]
def add_movie_title(df: pd.DataFrame, source_url_path: str):
logging.info("Started creating Dataframe for title_basics(data).")
logging.info(
f"\tCreating Dataframe(df_title_basics) for movie_id and title by reading ./files/{(source_url_path).split('/')[-1]}."
)
df_title_basics = pd.read_csv(
str(source_url_path),
sep="\t",
compression="gzip",
usecols=["tconst", "primaryTitle"],
)
logging.info(
"\tRenaming Dataframe(df_title_basics) columns from ['tconst', 'primaryTitle'] -> ['movie_id', 'title']."
)
rename_headers(df_title_basics, {"tconst": "movie_id", "primaryTitle": "title"})
logging.info(
"Merging two Dataframes(df_reviews & df_title_basics) by using left-join and assigned to variable df"
)
df = pd.merge(df, df_title_basics, how="left")
logging.info("Successfully created final Dataframe.")
return df