in src/gen_annotation_data.py [0:0]
def gen_data_for_archetype(self, user: UserArchetype, allow_same_task_twice=False, n_sample_pages=10, retarget_topic=True):
print(f"gen_data_for_archetype {n_sample_pages}")
data_list = []
task_count_index = randint(0, len(TYPICAL_TASK_COUNTS) - 1)
task_count = TYPICAL_TASK_COUNTS[task_count_index]
test_set_id = str(uuid.uuid4())
task_list = self.compute_tasks_for_user(user.user_description, TASKS)
cur_task_list = pd.Series(task_list).sample(n=min(task_count, len(task_list))).to_list()
for task in cur_task_list:
print(f"running task {task}")
# if randrange(2) or True:
# revised_task = self.lm.text_query(f"I'm trying to define a task for the following user archetype: {user.user_description}. Please update the following task to make it more specific for an imagined scenario that you dream up. Use just a few words ### {task}", retry_count=1)
# if revised_task is not None and len(revised_task) > 0:
# print(f"Made task more specific as {revised_task}")
# task = revised_task
browsing_data = self.lm.ask_df(
f"We are generating sample browsing data for a user of the following user. {user.user_description}"
f"Generate {n_sample_pages} sample page tiles and URLs for the user performing a specific instance task {task} in a single browser session",
[TAB_URL_KEY, TAB_TITLE_KEY])
open_tabs_info = OPEN_TABS_PER_TASK[task] if task in OPEN_TABS_PER_TASK else OPEN_TABS_PER_TASK["default"]
num_open_tabs = randint(open_tabs_info["min"], open_tabs_info["max"])
browsing_data = browsing_data.sample(n=num_open_tabs).reset_index(drop=True)
browsing_data["task"] = task
browsing_data["test_set_id"] = test_set_id
browsing_data["task_id"] = f"{test_set_id}_{str(uuid.uuid4())}"
browsing_data["user_description"] = user.user_description
data_list.append(browsing_data)
return pd.concat(data_list, axis=0)