in arctic_inference/suffix_decoding/simulator.py [0:0]
def get_data(args: argparse.Namespace) -> Tuple[pd.DataFrame,
Optional[pd.DataFrame]]:
dataset = read_data_file(args.dataset, args.prompt_column,
args.response_column, args.format)
max_num_eval = max(args.num_eval) if args.num_eval else 1
max_num_train = max(args.num_train) if args.num_train else 0
if args.train_dataset is not None:
train_dataset = read_data_file(args.train_dataset, args.prompt_column,
args.response_column, args.format)
if args.num_eval and max_num_eval > len(dataset):
raise ValueError(
f"Number of evaluation examples ({max_num_eval}) exceeds the "
f"size of the dataset ({len(dataset)})"
)
if args.num_train and max_num_train > len(train_dataset):
raise ValueError(
f"Number of training examples ({max_num_train}) exceeds the "
f"size of the training dataset ({len(train_dataset)})"
)
else:
train_dataset = None
if max_num_eval + max_num_train > len(dataset):
raise ValueError(
f"Number of evaluation examples ({max_num_eval}) and training "
f"examples ({max_num_train}) exceed the size of the dataset "
f"({len(dataset)})"
)
return dataset, train_dataset