def main()

in arctic_inference/suffix_decoding/simulator.py [0:0]


def main(args: argparse.Namespace):
    dataset, train_dataset = get_data(args)
    # Tokenize datasets (if needed)
    if args.tokenizer is not None:
        dataset = tokenize_data(dataset, args.tokenizer)
        if train_dataset is not None:
            train_dataset = tokenize_data(train_dataset, args.tokenizer)
    else:
        ensure_tokenized(dataset)
        if train_dataset is not None:
            ensure_tokenized(train_dataset)
    # Create all possible configurations
    num_eval = args.num_eval or [None]
    num_train = args.num_train or [None]
    configs = OrderedDict(
        num_eval=num_eval,
        num_train=num_train,
        seed=args.seed,
        max_depth=args.max_depth,
        max_spec_tokens=args.max_spec_tokens,
        max_spec_factor=args.max_spec_factor,
        min_token_prob=args.min_token_prob,
        use_tree_spec=args.use_tree_spec,
        use_cached_prompt=args.use_cached_prompt,
        evict_fraction=args.evict_fraction,
        evict_strategy=args.evict_strategy,
        max_cached_requests=args.max_cached_requests,
    )
    config_values = itertools.product(*configs.values())
    config_values = [
        (dataset, train_dataset, i, *v) for i, v in enumerate(config_values)]

    records = []
    if args.parallel and args.parallel > 1:
        with mp.Pool(args.parallel) as pool:
            for results in pool.starmap(process_task, config_values):
                records.extend(results)
    else:
        for cfg in config_values:
            records.extend(process_task(*cfg))

    print("Preparing results...")

    df = pd.DataFrame.from_records(records)

    summary = results_summary(df, list(configs.keys()))
    print("\nSummary of results:\n")
    print(summary.to_string() + "\n")

    if args.output is not None:
        df.to_csv(args.output, index=False)
        print(f"Detailed results saved to: {args.output}")