def taskinfo_selector()

in src/lighteval/tasks/registry.py [0:0]
32 lines of code
18 McCabe index (conditional complexity)

    def taskinfo_selector(self, tasks: str) -> dict[str, list[dict]]:
        """
        Converts a input string of tasks name to task information usable by lighteval.

        Args:
            tasks (str): A string containing a comma-separated list of tasks definitions in the
                format: "task_definition", where it can be
                containing a list of tasks.
                where task_definition can be:
                - path to a file containing a list of tasks (one per line)
                - task group defined in TASKS_GROUPS dict in custom tasks file
                - task name with few shot in format "suite|task|few_shot|truncate_few_shots"
                - task superset in format "suite|task_superset|few_shot|truncate_few_shots" (superset will run all tasks with format "suite|task_superset:{subset}|few_shot|truncate_few_shots")


        Returns:
            tuple[list[str], dict[str, list[tuple[int, bool]]]]: A tuple containing:
                - A sorted list of unique task names in the format "suite|task".
                - A dictionary mapping each task name to a list of tuples representing the few_shot and truncate_few_shots values.
        """
        few_shot_dict = collections.defaultdict(list)

        # We can provide a path to a file with a list of tasks or a string of comma-separated tasks
        if os.path.exists(tasks):
            with open(tasks, "r") as f:
                tasks_list = [line.strip() for line in f if line.strip() and not line.startswith("#")]
        else:
            tasks_list = tasks.split(",")

        # At this point the strings are either task name/superset name or group names
        # Here we deal with group names and map them to corresponding tasks
        expanded_tasks_list: list[str] = []
        for maybe_task_group in tasks_list:
            # We either expand the group (in case it's a group name), or we keep it as is (in case it's a task name or superset name)
            expanded_tasks = self.task_groups_dict.get(maybe_task_group, [maybe_task_group])
            if len(expanded_tasks) > 1:
                logger.info(f"Expanding task group {maybe_task_group} to {expanded_tasks}")
            expanded_tasks_list.extend(expanded_tasks)

        for task in expanded_tasks_list:
            try:
                suite_name, task_name, few_shot, truncate_few_shots = tuple(task.split("|"))
                truncate_few_shots = int(truncate_few_shots)
            except ValueError:
                raise ValueError(
                    f"Cannot get task info from {task}. correct format is suite|task|few_shot|truncate_few_shots"
                )

            if truncate_few_shots not in [0, 1]:
                raise ValueError(f"TruncateFewShots must be 0 or 1, got {truncate_few_shots}")

            truncate_few_shots = bool(truncate_few_shots)
            few_shot = int(few_shot)

            if suite_name not in DEFAULT_SUITES:
                logger.warning(
                    f"Suite {suite_name} unknown. This is not normal, unless you are testing adding new evaluations."
                )

            # This adds support for task supersets (eg: mmlu -> all the mmlu tasks)
            for expanded_task in self.expand_task_definition(f"{suite_name}|{task_name}"):
                # Store few_shot info for each task name (suite|task)
                few_shot_dict[expanded_task].append({"fewshots": few_shot, "truncate_fewshots": truncate_few_shots})

        return few_shot_dict