def main()

in scripts/zeno_visualize.py [0:0]
81 lines of code
17 McCabe index (conditional complexity)

def main():
    """Upload the results of your benchmark tasks to the Zeno AI evaluation platform.

    This scripts expects your results to live in a data folder where subfolders contain results of individual models.
    """
    args = parse_args()

    client = ZenoClient(os.environ["ZENO_API_KEY"])

    # Get all model subfolders from the parent data folder.
    models = [
        os.path.basename(os.path.normpath(f))
        for f in os.scandir(Path(args.data_path))
        if f.is_dir()
    ]

    assert len(models) > 0, "No model directories found in the data_path."

    # Get the tasks from the latest results file of the first model.
    tasks = set(tasks_for_model(models[0], args.data_path))

    # Get tasks names from the latest results file for each model
    # Get intersection of tasks for all models
    for model in models:
        old_tasks = tasks.copy()
        task_count = len(tasks)
        model_tasks = set(tasks_for_model(model, args.data_path))
        tasks.intersection(set(model_tasks))

        if task_count != len(tasks):
            eval_logger.warning(
                f"All models must have the same tasks. {model} has tasks: {model_tasks} but have already recorded tasks: {old_tasks}. Taking intersection {tasks}"
            )

    assert (
        len(tasks) > 0
    ), "Must provide at least one task in common amongst models to compare."

    for task in tasks:
        # Upload data for all models
        for model_index, model in enumerate(models):
            # Get latest results and sample results for a model
            model_dir = Path(args.data_path, model)
            model_files = [f.as_posix() for f in model_dir.iterdir() if f.is_file()]
            model_results_filenames = get_results_filenames(model_files)
            model_sample_filenames = get_sample_results_filenames(model_files)
            latest_results = get_latest_filename(
                [Path(f).name for f in model_results_filenames]
            )
            latest_sample_results = get_latest_filename(
                [Path(f).name for f in model_sample_filenames if task in f]
            )
            model_args = re.sub(
                r"[\"<>:/\|\\?\*\[\]]+",
                "__",
                json.load(
                    open(Path(args.data_path, model, latest_results), encoding="utf-8")
                )["config"]["model_args"],
            )
            print(model_args)
            data = []
            with open(
                Path(args.data_path, model, latest_sample_results),
                "r",
                encoding="utf-8",
            ) as file:
                for line in file:
                    data.append(json.loads(line.strip()))

            configs = json.load(
                open(Path(args.data_path, model, latest_results), encoding="utf-8")
            )["configs"]
            config = configs[task]

            if model_index == 0:  # Only need to assemble data for the first model
                metrics = []
                for metric in config["metric_list"]:
                    metrics.append(
                        ZenoMetric(
                            name=metric["metric"],
                            type="mean",
                            columns=[metric["metric"]],
                        )
                    )
                project = client.create_project(
                    name=args.project_name + (f"_{task}" if len(tasks) > 1 else ""),
                    view="text-classification",
                    metrics=metrics,
                )
                project.upload_dataset(
                    generate_dataset(data, config),
                    id_column="id",
                    data_column="data",
                    label_column="labels",
                )

            project.upload_system(
                generate_system_df(data, config),
                name=model,
                id_column="id",
                output_column="output",
            )