def convert_json_file_to_csv()

in datasets/hacker_news/pipelines/_images/run_csv_transform_kub/csv_transform.py [0:0]


def convert_json_file_to_csv(source_json_file: str) -> str:
    target_file_batch_csv = source_json_file.replace(".json", ".csv")
    if os.path.isfile(target_file_batch_csv):
        # Remove the [local] batch csv file if it exists
        os.remove(target_file_batch_csv)
    with open(source_json_file, "r") as source_json:
        data = json.load(source_json)
        df = pd.json_normalize(data["data"], max_level=0)
        df["time"] = df["time"].astype("Int64")
        df["time"] = df["time"].astype("str")
        df["time"] = df["time"].apply(lambda x: "" if x == "<NA>" else x)
        df["timestamp"] = df["time"].apply(
            lambda x: (
                ""
                if x == ""
                else f"{datetime.fromtimestamp(int(x)).strftime('%Y-%m-%d %H:%M:%S')} UTC"
            )
        )
        df["descendants"] = df["descendants"].astype("Int64")
        df["descendants"] = df["descendants"].astype("str")
        df["descendants"] = df["descendants"].apply(lambda x: "" if x == "<NA>" else x)
        df["score"] = df["score"].astype("Int64")
        df["score"] = df["score"].astype("str")
        df["score"] = df["score"].apply(lambda x: "" if x == "<NA>" else x)
        df["parent"] = df["parent"].astype("Int64")
        df["parent"] = df["parent"].astype("str")
        df["parent"] = df["parent"].apply(lambda x: "" if x == "<NA>" else x)
        df["ranking"] = ""
        df["text"] = df["text"].replace(r"\n", " ", regex=True)
        df["text"] = df["text"].replace(r"\r", " ", regex=True)
        df["text"] = df["text"].replace(r"\x00", "", regex=True)
        df["title"] = df["title"].replace(r"\n", " ", regex=True)
        df["title"] = df["title"].replace(r"\r", " ", regex=True)
        df["title"] = df["title"].replace(r"\x00", "", regex=True)
        df = df[
            [
                "title",
                "url",
                "text",
                "dead",
                "by",
                "score",
                "time",
                "timestamp",
                "type",
                "id",
                "parent",
                "descendants",
                "ranking",
                "deleted",
            ]
        ]
        df["deleted"] = df["deleted"].apply(
            lambda x: "" if not (x == "True" or x == "False") else x
        )
        save_to_new_file(df, file_path=str(target_file_batch_csv), sep="|")
        # Release the dataframe memory
        del df
    return str(target_file_batch_csv)