in evaluation_pipeline/retrieval.py [0:0]
def process_history(row_limit, history_file_path):
browsing_history = pd.read_csv(history_file_path).head(row_limit)
browsing_history['last_visit_date'] = pd.to_datetime(browsing_history['last_visit_date'], unit='us')
# fill empty last_visit_date with default value "1970-01-01"
browsing_history['last_visit_date'] = browsing_history['last_visit_date'].fillna(pd.to_datetime("1970-01-01"))
browsing_history['combined_text'] = browsing_history['title'].fillna('') + " " + browsing_history['description'].fillna('')
browsing_history['combined_text_url'] = browsing_history['title'].fillna('') + " " + browsing_history['description'].fillna('') + browsing_history['url'].fillna('')
browsing_history = browsing_history.loc[browsing_history['combined_text'] != ''].reset_index(drop=True)
print(len(browsing_history))
return browsing_history