def merge_websites_with_user_visits()

in source/sagemaker/data-preparation/data_prep.py [0:0]


def merge_websites_with_user_visits(data_dir, facts, url_data, primary_key, output_dir, logs):
    with open(os.path.join(data_dir, facts)) as f_in:
        for i, line in enumerate(f_in):
            j = json.loads(line.strip())
            user_visits = pd.json_normalize(j.get("facts"))
            fids = user_visits[primary_key].values
            user_visits = pd.concat((user_visits.set_index(primary_key), url_data.loc[fids]), axis=1)
            user_visits['uid'] = j.get('uid')
            mode, header = ('w', True) if i == 0 else ('a', False)
            with open(os.path.join(output_dir, logs), mode) as f:
                user_visits.to_csv(f, index=False, header=header)