in ludwig/hyperopt_cli.py [0:0]
def cli(sys_argv):
parser = argparse.ArgumentParser(
description="This script searches for optimal Hyperparameters",
prog="ludwig hyperopt",
usage="%(prog)s [options]",
)
# -------------------
# Hyperopt parameters
# -------------------
parser.add_argument(
"-sshs",
"--skip_save_hyperopt_statistics",
help="skips saving hyperopt statistics file",
action="store_true",
default=False,
)
# ----------------------------
# Experiment naming parameters
# ----------------------------
parser.add_argument(
"--output_directory",
type=str,
default="results",
help="directory that contains the results",
)
parser.add_argument(
"--experiment_name", type=str, default="hyperopt",
help="experiment name"
)
parser.add_argument(
"--model_name", type=str, default="run", help="name for the model"
)
# ---------------
# Data parameters
# ---------------
parser.add_argument(
"--data_csv",
help="input data CSV file. "
"If it has a split column, it will be used for splitting "
"(0: train, 1: validation, 2: test), "
"otherwise the dataset will be randomly split",
)
parser.add_argument("--data_train_csv", help="input train data CSV file")
parser.add_argument("--data_validation_csv",
help="input validation data CSV file")
parser.add_argument("--data_test_csv", help="input test data CSV file")
parser.add_argument(
"--data_hdf5",
help="input data HDF5 file. It is an intermediate preprocess version of"
" the input CSV created the first time a CSV file is used in the "
"same directory with the same name and a hdf5 extension",
)
parser.add_argument(
"--data_train_hdf5",
help="input train data HDF5 file. It is an intermediate preprocess "
"version of the input CSV created the first time a CSV file is "
"used in the same directory with the same name and a hdf5 "
"extension",
)
parser.add_argument(
"--data_validation_hdf5",
help="input validation data HDF5 file. It is an intermediate preprocess"
" version of the input CSV created the first time a CSV file is "
"used in the same directory with the same name and a hdf5 "
"extension",
)
parser.add_argument(
"--data_test_hdf5",
help="input test data HDF5 file. It is an intermediate preprocess "
"version of the input CSV created the first time a CSV file is "
"used in the same directory with the same name and a hdf5 "
"extension",
)
parser.add_argument(
"--train_set_metadata_json",
help="input metadata JSON file. It is an intermediate preprocess file "
"containing the mappings of the input CSV created the first time a"
" CSV file is used in the same directory with the same name and a "
"json extension",
)
parser.add_argument(
"-sspi",
"--skip_save_processed_input",
help="skips saving intermediate HDF5 and JSON files",
action="store_true",
default=False,
)
# ----------------
# Model parameters
# ----------------
model_definition = parser.add_mutually_exclusive_group(required=True)
model_definition.add_argument(
"-md", "--model_definition", type=yaml.safe_load,
help="model definition"
)
model_definition.add_argument(
"-mdf",
"--model_definition_file",
help="YAML file describing the model. Ignores --model_hyperparameters",
)
parser.add_argument(
"-mlp",
"--model_load_path",
help="path of a pretrained model to load as initialization",
)
parser.add_argument(
"-mrp",
"--model_resume_path",
help="path of a the model directory to resume training of",
)
parser.add_argument(
"-sstd",
"--skip_save_training_description",
action="store_true",
default=False,
help="disables saving the description JSON file",
)
parser.add_argument(
"-ssts",
"--skip_save_training_statistics",
action="store_true",
default=False,
help="disables saving training statistics JSON file",
)
parser.add_argument(
"-ssm",
"--skip_save_model",
action="store_true",
default=False,
help="disables saving weights each time the model imrpoves. "
"By default Ludwig saves weights after each epoch "
"the validation metric imrpvoes, but if the model is really big "
"that can be time consuming if you do not want to keep "
"the weights and just find out what performance can a model get "
"with a set of hyperparameters, use this parameter to skip it",
)
parser.add_argument(
"-ssp",
"--skip_save_progress",
action="store_true",
default=False,
help="disables saving weights after each epoch. By default ludwig saves "
"weights after each epoch for enabling resuming of training, but "
"if the model is really big that can be time consuming and will "
"save twice as much space, use this parameter to skip it",
)
parser.add_argument(
"-ssl",
"--skip_save_log",
action="store_true",
default=False,
help="disables saving TensorBoard logs. By default Ludwig saves "
"logs for the TensorBoard, but if it is not needed turning it off "
"can slightly increase the overall speed",
)
# ------------------
# Runtime parameters
# ------------------
parser.add_argument(
"-rs",
"--random_seed",
type=int,
default=42,
help="a random seed that is going to be used anywhere there is a call "
"to a random number generator: data splitting, parameter "
"initialization and training set shuffling",
)
parser.add_argument(
"-g", "--gpus", nargs="+", type=int, default=None,
help="list of gpus to use"
)
parser.add_argument(
'-gml',
'--gpu_memory_limit',
type=int,
default=None,
help='maximum memory in MB to allocate per GPU device'
)
parser.add_argument(
"-uh",
"--use_horovod",
action="store_true",
default=False,
help="uses horovod for distributed training",
)
parser.add_argument(
"-dbg",
"--debug",
action="store_true",
default=False,
help="enables debugging mode",
)
parser.add_argument(
"-l",
"--logging_level",
default="info",
help="the level of logging to use",
choices=["critical", "error", "warning", "info", "debug", "notset"],
)
args = parser.parse_args(sys_argv)
logging.getLogger('ludwig').setLevel(
logging_level_registry[args.logging_level]
)
global logger
logger = logging.getLogger('ludwig.hyperopt')
set_on_master(args.use_horovod)
if is_on_master():
print_ludwig("Hyperopt", LUDWIG_VERSION)
hyperopt(**vars(args))