in ludwig/train.py [0:0]
def cli(sys_argv):
parser = argparse.ArgumentParser(
description='This script trains a model',
prog='ludwig train',
usage='%(prog)s [options]'
)
# ----------------------------
# Experiment naming parameters
# ----------------------------
parser.add_argument(
'--output_directory',
type=str,
default='results',
help='directory that contains the results'
)
parser.add_argument(
'--experiment_name',
type=str,
default='experiment',
help='experiment name'
)
parser.add_argument(
'--model_name',
type=str,
default='run',
help='name for the model'
)
# ---------------
# Data parameters
# ---------------
parser.add_argument(
'--data_csv',
help='input data CSV file. '
'If it has a split column, it will be used for splitting '
'(0: train, 1: validation, 2: test), '
'otherwise the dataset will be randomly split'
)
parser.add_argument('--data_train_csv', help='input train data CSV file')
parser.add_argument(
'--data_validation_csv',
help='input validation data CSV file'
)
parser.add_argument('--data_test_csv', help='input test data CSV file')
parser.add_argument(
'--data_hdf5',
help='input data HDF5 file. It is an intermediate preprocess version of'
' the input CSV created the first time a CSV file is used in the '
'same directory with the same name and a hdf5 extension'
)
parser.add_argument(
'--data_train_hdf5',
help='input train data HDF5 file. It is an intermediate preprocess '
'version of the input CSV created the first time a CSV file is '
'used in the same directory with the same name and a hdf5 '
'extension'
)
parser.add_argument(
'--data_validation_hdf5',
help='input validation data HDF5 file. It is an intermediate preprocess'
' version of the input CSV created the first time a CSV file is '
'used in the same directory with the same name and a hdf5 '
'extension'
)
parser.add_argument(
'--data_test_hdf5',
help='input test data HDF5 file. It is an intermediate preprocess '
'version of the input CSV created the first time a CSV file is '
'used in the same directory with the same name and a hdf5 '
'extension'
)
parser.add_argument(
'--train_set_metadata_json',
help='input metadata JSON file. It is an intermediate preprocess file '
'containing the mappings of the input CSV created the first time a'
' CSV file is used in the same directory with the same name and a '
'json extension'
)
parser.add_argument(
'-sspi',
'--skip_save_processed_input',
help='skips saving intermediate HDF5 and JSON files',
action='store_true',
default=False
)
# ----------------
# Model parameters
# ----------------
model_definition = parser.add_mutually_exclusive_group(required=True)
model_definition.add_argument(
'-md',
'--model_definition',
type=yaml.safe_load,
help='model definition'
)
model_definition.add_argument(
'-mdf',
'--model_definition_file',
help='YAML file describing the model. Ignores --model_hyperparameters'
)
parser.add_argument(
'-mlp',
'--model_load_path',
help='path of a pretrained model to load as initialization'
)
parser.add_argument(
'-mrp',
'--model_resume_path',
help='path of a the model directory to resume training of'
)
parser.add_argument(
'-sstd',
'--skip_save_training_description',
action='store_true',
default=False,
help='disables saving the description JSON file'
)
parser.add_argument(
'-ssts',
'--skip_save_training_statistics',
action='store_true',
default=False,
help='disables saving training statistics JSON file'
)
parser.add_argument(
'-ssm',
'--skip_save_model',
action='store_true',
default=False,
help='disables saving weights each time the model imrpoves. '
'By default Ludwig saves weights after each epoch '
'the validation metric imrpvoes, but if the model is really big '
'that can be time consuming if you do not want to keep '
'the weights and just find out what performance can a model get '
'with a set of hyperparameters, use this parameter to skip it'
)
parser.add_argument(
'-ssp',
'--skip_save_progress',
action='store_true',
default=False,
help='disables saving weights after each epoch. By default ludwig saves '
'weights after each epoch for enabling resuming of training, but '
'if the model is really big that can be time consuming and will '
'save twice as much space, use this parameter to skip it'
)
parser.add_argument(
'-ssl',
'--skip_save_log',
action='store_true',
default=False,
help='disables saving TensorBoard logs. By default Ludwig saves '
'logs for the TensorBoard, but if it is not needed turning it off '
'can slightly increase the overall speed'
)
# ------------------
# Runtime parameters
# ------------------
parser.add_argument(
'-rs',
'--random_seed',
type=int,
default=42,
help='a random seed that is going to be used anywhere there is a call '
'to a random number generator: data splitting, parameter '
'initialization and training set shuffling'
)
parser.add_argument(
'-g',
'--gpus',
nargs='+',
type=int,
default=None,
help='list of gpus to use'
)
parser.add_argument(
'-gml',
'--gpu_memory_limit',
type=int,
default=None,
help='maximum memory in MB to allocate per GPU device'
)
parser.add_argument(
'-dpt',
'--disable_parallel_threads',
action='store_false',
dest='allow_parallel_threads',
help='disable TensorFlow from using multithreading for reproducibility'
)
parser.add_argument(
'-uh',
'--use_horovod',
action='store_true',
default=None,
help='uses horovod for distributed training'
)
parser.add_argument(
'-dbg',
'--debug',
action='store_true',
default=False, help='enables debugging mode'
)
parser.add_argument(
'-l',
'--logging_level',
default='info',
help='the level of logging to use',
choices=['critical', 'error', 'warning', 'info', 'debug', 'notset']
)
args = parser.parse_args(sys_argv)
logging.getLogger('ludwig').setLevel(
logging_level_registry[args.logging_level]
)
global logger
logger = logging.getLogger('ludwig.train')
set_on_master(args.use_horovod)
if is_on_master():
print_ludwig('Train', LUDWIG_VERSION)
full_train(**vars(args))