in arctic_inference/suffix_decoding/simulator.py [0:0]
def main(args: argparse.Namespace):
dataset, train_dataset = get_data(args)
# Tokenize datasets (if needed)
if args.tokenizer is not None:
dataset = tokenize_data(dataset, args.tokenizer)
if train_dataset is not None:
train_dataset = tokenize_data(train_dataset, args.tokenizer)
else:
ensure_tokenized(dataset)
if train_dataset is not None:
ensure_tokenized(train_dataset)
# Create all possible configurations
num_eval = args.num_eval or [None]
num_train = args.num_train or [None]
configs = OrderedDict(
num_eval=num_eval,
num_train=num_train,
seed=args.seed,
max_depth=args.max_depth,
max_spec_tokens=args.max_spec_tokens,
max_spec_factor=args.max_spec_factor,
min_token_prob=args.min_token_prob,
use_tree_spec=args.use_tree_spec,
use_cached_prompt=args.use_cached_prompt,
evict_fraction=args.evict_fraction,
evict_strategy=args.evict_strategy,
max_cached_requests=args.max_cached_requests,
)
config_values = itertools.product(*configs.values())
config_values = [
(dataset, train_dataset, i, *v) for i, v in enumerate(config_values)]
records = []
if args.parallel and args.parallel > 1:
with mp.Pool(args.parallel) as pool:
for results in pool.starmap(process_task, config_values):
records.extend(results)
else:
for cfg in config_values:
records.extend(process_task(*cfg))
print("Preparing results...")
df = pd.DataFrame.from_records(records)
summary = results_summary(df, list(configs.keys()))
print("\nSummary of results:\n")
print(summary.to_string() + "\n")
if args.output is not None:
df.to_csv(args.output, index=False)
print(f"Detailed results saved to: {args.output}")