def parse_args()

in benchmarking/cli/launch_utils.py [0:0]
281 lines of code
10 McCabe index (conditional complexity)

def parse_args(allow_lists_as_values=True):
    """
    Argument parser for CLI. Normally, this parameterizes a single experiment.
    But if `allow_lists_as_values == True`, certain arguments admit lists as
    values. In this case, experiments of all combinations of values (Cartesian
    product) are launched.

    :param allow_lists_as_values: See above
    :return: params dict. Note that if an argument added to the parser is not
        provided a value for, it is contained in the dict with value None

    """
    parser = argparse.ArgumentParser(
        description='Asynchronous Hyperparameter Optimization')
    # We parse the CL args twice. The first pass parses all global arguments
    # (not specific to the benchmark). From that pass, we know what the
    # benchmark is. In a second pass, we parse additional benchmark-specific
    # arguments, as defined in the default_params for the benchmark.
    if allow_lists_as_values:
        allow_list = dict(nargs='+')
    else:
        allow_list = dict()

    if allow_lists_as_values:
        parser.add_argument('--argument_groups', type=str,
                            help='Specify groups of list arguments, separated '
                                 'by |. Arguments in a group are iterated '
                                 'over together')
    # Note: The benchmark cannot be a list argument, since it can define its
    # own CL arguments
    parser.add_argument('--benchmark_name', type=str,
                        default='mlp_fashionmnist',
                        choices=supported_benchmarks(),
                        help='Benchmark to run experiment on')
    parser.add_argument('--skip_initial_experiments', type=int, default=0,
                        help='When multiple experiments are launched (due to '
                             'list arguments), this number of initial '
                             'experiments are skipped')
    parser.add_argument('--backend', type=str, default='local',
                        choices=('local', 'sagemaker', 'simulated'),
                        help='Backend for training evaluations')
    parser.add_argument('--local_tuner', action='store_true',
                        help='Run tuning experiment locally? Otherwise, it is '
                             'run remotely (which allows to run multiple '
                             'tuning experiments in parallel)')
    parser.add_argument('--run_id', type=int,
                        help='Identifier to distinguish between runs '
                             '(nonnegative integers)',
                        **allow_list)
    parser.add_argument('--num_runs', type=int,
                        help='Number of repetitions, with run_id 0, 1, ...'
                             'Only if run_id not given (ignored otherwise)')
    parser.add_argument('--random_seed_offset', type=int,
                        help='Master random seed is this plus run_id, modulo '
                             '2 ** 32. Drawn at random if not given')
    parser.add_argument('--instance_type', type=str,
                        help='SageMaker instance type for workers',
                        **allow_list)
    parser.add_argument('--tuner_instance_type', type=str,
                        default='ml.c5.xlarge',
                        help='SageMaker instance type for tuner (only for '
                             'sagemaker backend and remote tuning)',
                        **allow_list)
    parser.add_argument('--num_workers', type=int,
                        help='Number of workers (parallel evaluations)',
                        **allow_list)
    parser.add_argument('--image_uri', type=str,
                        help='URI of Docker image (sagemaker backend)')
    parser.add_argument('--sagemaker_execution_role', type=str,
                        help='SageMaker execution role (sagemaker backend)')
    parser.add_argument('--experiment_name', type=str,
                        help='Experiment name (used as job_name_prefix in '
                             'sagemaker backend)')
    parser.add_argument('--no_debug_log', action='store_true',
                        help='Switch off verbose logging')
    parser.add_argument('--debug_log_level', action='store_true',
                        help='Set logging level to DEBUG (default is INFO)')
    parser.add_argument('--no_tuner_logging', action='store_true',
                        help='By default, the full tuning status is logged '
                             'in the tuning loop every --print_update_interval'
                             ' secs. If this is set, this logging is suppressed')
    parser.add_argument('--enable_sagemaker_profiler', action='store_true',
                        help='Enable SageMaker profiler (this needs one '
                             'processing job for each training job')
    parser.add_argument('--no_experiment_subdirectory', action='store_true',
                        help='When storing results, do not use subdirectory '
                             'experiment_name')
    parser.add_argument('--cost_model_type', type=str,
                        help='Selects cost model of benchmark',
                        **allow_list)
    parser.add_argument('--scheduler', type=str, default='fifo',
                        help='Scheduler name',
                        **allow_list)
    parser.add_argument('--searcher', type=str,
                        help='Searcher name',
                        **allow_list)
    parser.add_argument('--results_update_interval', type=int, default=300,
                        help='Results and tuner state are stored every this '
                             'many seconds')
    parser.add_argument('--print_update_interval', type=int, default=300,
                        help='Tuner status printed every this many seconds')
    parser.add_argument('--tuner_sleep_time', type=float, default=5,
                        help='Tuner tries to fetch new results every this '
                             'many seconds')
    parser.add_argument('--max_resource_level', type=int,
                        help='Largest resource level (e.g., epoch number) '
                             'for training evaluations',
                        **allow_list)
    parser.add_argument('--epochs', type=int,
                        help='Deprecated: Use max_resource_level instead',
                        **allow_list)
    parser.add_argument('--num_trials', type=int,
                        help='Maximum number of trials',
                        **allow_list)
    parser.add_argument('--scheduler_timeout', type=int,
                        help='Trials started until this cutoff time (in secs)',
                        **allow_list)
    parser.add_argument('--max_failures', type=int, default=1,
                        help='The tuning job terminates once this many '
                             'training evaluations failed',
                        **allow_list)
    parser.add_argument('--s3_bucket', type=str,
                        help='S3 bucket to write checkpoints and results to. '
                             'Defaults to default bucket of session')
    parser.add_argument('--no_gpu_rotation', action='store_true',
                        help='For local back-end on a GPU instance: By '
                             'default, trials are launched in parallel '
                             'on different GPU cores (GPU rotation). If '
                             'this is set, all GPU cores are used for a '
                             'single evaluation')
    parser.add_argument('--blackbox_repo_s3_root', type=str,
                        help='S3 root directory for blackbox repository. '
                             'Defaults to default bucket of session')
    parser.add_argument('--blackbox_seed', type=int,
                        help='Fixed seeds of blackbox queries to this value '
                             '(0 is safe), so that they return the same '
                             'metric values for the same config')
    # Arguments for scheduler
    parser.add_argument('--brackets', type=int,
                        help='Number of brackets in HyperbandScheduler',
                        **allow_list)
    parser.add_argument('--reduction_factor', type=float,
                        help='Reduction factor in HyperbandScheduler',
                        **allow_list)
    parser.add_argument('--grace_period', type=int,
                        help='Minimum resource level (e.g., epoch number) '
                             'in HyperbandScheduler',
                        **allow_list)
    parser.add_argument('--rung_levels', type=str,
                        help='List of resource levels to use for the rungs '
                             'in HyperbandScheduler. Entries must be positive '
                             'ints. Overrides --grace_period, '
                             '--reduction_factor if given',
                        **allow_list)
    parser.add_argument('--no_rung_system_per_bracket', action='store_true',
                        help='Parameter of HyperbandScheduler')
    parser.add_argument('--searcher_data', type=str,
                        help='Parameter of HyperbandScheduler',
                        **allow_list)
    parser.add_argument('--register_pending_myopic', action='store_true',
                        help='Parameter of HyperbandScheduler')
    parser.add_argument('--not_normalize_targets', action='store_true',
                        help='Do not normalize targets to mean 0, variance 1'
                             ' before fitting surrogate model')
    parser.add_argument('--pasha_ranking_criterion', type=str,
                        help='Parameter of PASHA scheduler',
                        **allow_list)
    parser.add_argument('--pasha_epsilon', type=float,
                        help='Parameter of PASHA scheduler',
                        **allow_list)
    parser.add_argument('--pasha_epsilon_scaling', type=str,
                        help='Parameter of PASHA scheduler',
                        **allow_list)
    # Arguments for bayesopt searcher
    parser.add_argument('--searcher_model', type=str,
                        help='Surrogate model for bayesopt searcher with '
                             'HyperbandScheduler',
                        **allow_list)
    parser.add_argument('--searcher_num_init_random', type=int,
                        help='Number of initial trials not chosen by searcher',
                        **allow_list)
    parser.add_argument('--searcher_num_init_candidates', type=int,
                        help='Number of random candidates scored to seed search',
                        **allow_list)
    parser.add_argument('--searcher_num_fantasy_samples', type=int,
                        help='Number of fantasy samples',
                        **allow_list)
    help_str = "Rule for resource level at which acquisition function is used " +\
        f"[{SUPPORTED_RESOURCE_FOR_ACQUISITION}]"
    parser.add_argument('--searcher_resource_acq', type=str,
                        help=help_str,
                        **allow_list)
    parser.add_argument('--searcher_resource_acq_bohb_threshold', type=int,
                        help='Parameter for resource_acq == bohb',
                        **allow_list)
    parser.add_argument('--searcher_gp_resource_kernel', type=str,
                        help='Multi-task kernel for HyperbandScheduler',
                        **allow_list)
    parser.add_argument('--searcher_opt_skip_period', type=int,
                        help='Update GP hyperparameters only every (...) times',
                        **allow_list)
    parser.add_argument('--searcher_opt_skip_init_length', type=int,
                        help='Update GP hyperparameters every time until '
                             '(...) observations are done',
                        **allow_list)
    parser.add_argument('--searcher_opt_skip_num_max_resource',
                        action='store_true',
                        help='Update GP hyperparameters only when training '
                             'runs reach max_t')
    parser.add_argument('--searcher_opt_nstarts', type=int,
                        help='GP hyperparameter optimization restarted (...) '
                             'times',
                        **allow_list)
    parser.add_argument('--searcher_opt_maxiter', type=int,
                        help='Maximum number of iterations of GP '
                             'hyperparameter optimization',
                        **allow_list)
    parser.add_argument('--searcher_initial_scoring', type=str,
                        help='Scoring function to rank initial candidates '
                             'for seeding search [thompson_indep, acq_func]',
                        **allow_list)
    parser.add_argument('--searcher_issm_gamma_one', action='store_true',
                        help='Fix gamma parameter of ISSM to one?')
    parser.add_argument('--searcher_exponent_cost', type=float,
                        help='Exponent of cost term in cost-aware expected '
                             'improvement acquisition function',
                        **allow_list)
    parser.add_argument('--searcher_expdecay_normalize_inputs', action='store_true',
                        help='Normalize resource values to [0, 1] in '
                             'GP-expdecay surrogate model (only if '
                             'searcher_model = gp_expdecay)')
    parser.add_argument('--searcher_num_init_candidates_for_batch', type=int,
                        help='Relevant for synchronous Hyperband with bayesopt '
                             'searcher. If batch of size B is suggested, the '
                             'first suggest uses searcher_num_init_candidates, '
                             'the B-1 subsequent suggests use this value',
                        **allow_list)
    parser.add_argument('--searcher_use_old_code',
                        action='store_true',
                        help='DEBUG: Use old code for gp_issm, gp_expdecay')
    parser.add_argument('--searcher_no_fantasizing', action='store_true',
                        help='Ignore pending evaluations, do not use fantasizing')
    # Arguments for kde searcher
    parser.add_argument('--searcher_num_min_data_points', type=int,
                        help='KDE: Minimum number of datapoints needed to fit models',
                        **allow_list)
    parser.add_argument('--searcher_top_n_percent', type=int,
                        help='KDE: Top (bottom) model fit on this top (bottom) fraction of data',
                        **allow_list)
    parser.add_argument('--searcher_min_bandwidth', type=float,
                        help='KDE: Minimum bandwidth',
                        **allow_list)
    parser.add_argument('--searcher_num_candidates', type=int,
                        help='KDE: Number of candidates that are sampled to optimize the acquisition function',
                        **allow_list)
    parser.add_argument('--searcher_bandwidth_factor', type=int,
                        help='KDE: Parameter to scale bandwidth',
                        **allow_list)
    parser.add_argument('--searcher_random_fraction', type=float,
                        help='KDE: Fraction of configs suggested at random',
                        **allow_list)

    # First pass: All global arguments
    # Why do we parse all global args here, and not just benchmark_name?
    # This is to make sure that the help option of the parser lists all
    # global arguments and their help strings.
    _params = parser.parse_known_args()[0]
    benchmark_name = _params.benchmark_name

    # Add benchmark-specific CL args (if any)
    # These are the ones listed in benchmark['default_params'], minus args which
    # are already global (i.e., added above)
    _, default_params = benchmark_factory({'benchmark_name': benchmark_name})
    help_str = f"Additional parameter for {benchmark_name} benchmark"
    have_extra_args = False
    for name, value in default_params.items():
        try:
            # We don't need to set defaults here
            if value is None:
                _type = str
            else:
                _type = type(value)
            parser.add_argument('--' + name, type=_type, help=help_str)
            have_extra_args = True
        except argparse.ArgumentError:
            pass

    # Second pass: All args (global and benchmark-specific)
    if have_extra_args:
        params = vars(parser.parse_args())
    else:
        params = _params
    # Post-processing
    params['debug_log'] = not params['no_debug_log']
    del params['no_debug_log']
    params['rotate_gpus'] = not params['no_gpu_rotation']
    del params['no_gpu_rotation']
    epochs = params.get('epochs')
    if params.get('max_resource_level') is None:
        if epochs is not None:
            logger.info("--epochs is deprecated, please use "
                        "--max_resource_level in the future")
            params['max_resource_level'] = epochs
    elif epochs is not None:
        logger.info("Both --max_resource_level and the deprecated "
                    "--epochs are set. The latter is ignored")
    if 'epochs' in params:
        del params['epochs']
    params['rung_system_per_bracket'] = not params['no_rung_system_per_bracket']
    del params['no_rung_system_per_bracket']
    params['normalize_targets'] = not params['not_normalize_targets']
    del params['not_normalize_targets']
    params['searcher_use_new_code'] = not params['searcher_use_old_code']
    del params['searcher_use_old_code']
    return params