def gen_data_for_archetype()

in src/gen_annotation_data.py [0:0]


    def gen_data_for_archetype(self, user: UserArchetype, allow_same_task_twice=False, n_sample_pages=10, retarget_topic=True):
        print(f"gen_data_for_archetype {n_sample_pages}")
        data_list = []
        task_count_index = randint(0, len(TYPICAL_TASK_COUNTS) - 1)
        task_count = TYPICAL_TASK_COUNTS[task_count_index]
        test_set_id = str(uuid.uuid4())
        task_list = self.compute_tasks_for_user(user.user_description, TASKS)
        cur_task_list = pd.Series(task_list).sample(n=min(task_count, len(task_list))).to_list()
        for task in cur_task_list:
            print(f"running task {task}")
#            if randrange(2) or True:
#                revised_task = self.lm.text_query(f"I'm trying to define a task for the following user archetype: {user.user_description}. Please update the following task to make it more specific for an imagined scenario that you dream up. Use just a few words ### {task}", retry_count=1)
#                if revised_task is not None and len(revised_task) > 0:
#                    print(f"Made task more specific as {revised_task}")
#                    task = revised_task
            browsing_data = self.lm.ask_df(
                f"We are generating sample browsing data for a user of the following user. {user.user_description}"
                f"Generate {n_sample_pages} sample page tiles and URLs for the user performing a specific instance task {task} in a single browser session",
                [TAB_URL_KEY, TAB_TITLE_KEY])
            open_tabs_info = OPEN_TABS_PER_TASK[task] if task in OPEN_TABS_PER_TASK else OPEN_TABS_PER_TASK["default"]
            num_open_tabs = randint(open_tabs_info["min"], open_tabs_info["max"])
            browsing_data = browsing_data.sample(n=num_open_tabs).reset_index(drop=True)
            browsing_data["task"] = task
            browsing_data["test_set_id"] = test_set_id
            browsing_data["task_id"] = f"{test_set_id}_{str(uuid.uuid4())}"
            browsing_data["user_description"] = user.user_description
            data_list.append(browsing_data)
        return pd.concat(data_list, axis=0)