def get_data()

in arctic_inference/suffix_decoding/simulator.py [0:0]


def get_data(args: argparse.Namespace) -> Tuple[pd.DataFrame,
                                                Optional[pd.DataFrame]]:
    dataset = read_data_file(args.dataset, args.prompt_column,
                             args.response_column, args.format)
    max_num_eval = max(args.num_eval) if args.num_eval else 1
    max_num_train = max(args.num_train) if args.num_train else 0
    if args.train_dataset is not None:
        train_dataset = read_data_file(args.train_dataset, args.prompt_column,
                                       args.response_column, args.format)
        if args.num_eval and max_num_eval > len(dataset):
            raise ValueError(
                f"Number of evaluation examples ({max_num_eval}) exceeds the "
                f"size of the dataset ({len(dataset)})"
            )
        if args.num_train and max_num_train > len(train_dataset):
            raise ValueError(
                f"Number of training examples ({max_num_train}) exceeds the "
                f"size of the training dataset ({len(train_dataset)})"
            )
    else:
        train_dataset = None
        if max_num_eval + max_num_train > len(dataset):
            raise ValueError(
                f"Number of evaluation examples ({max_num_eval}) and training "
                f"examples ({max_num_train}) exceed the size of the dataset "
                f"({len(dataset)})"
            )
    return dataset, train_dataset