def find_best_parametrization()

in access/fairseq/main.py [0:0]


def find_best_parametrization(exp_dir, metrics_coefs, preprocessors_kwargs, parametrization_budget=64):
    @lru_cache()
    def evaluate_parametrization(**instru_kwargs):
        # Note that we use default generate kwargs instead of provided one because they are faster
        preprocessors_kwargs = instru_kwargs_to_preprocessors_kwargs(instru_kwargs)
        simplifier = get_simplifier(exp_dir, preprocessors_kwargs=preprocessors_kwargs, generate_kwargs={})
        scores = evaluate_simplifier_on_turkcorpus(simplifier, phase='valid')
        return combine_metrics(scores['BLEU'], scores['SARI'], scores['FKGL'], metrics_coefs)

    def preprocessors_kwargs_to_instru_kwargs(preprocessors_kwargs):
        instru_kwargs = {}
        for preprocessor_name, preprocessor_kwargs in preprocessors_kwargs.items():
            assert '_' not in preprocessor_name
            preprocessor = get_preprocessor_by_name(preprocessor_name)(**preprocessor_kwargs)
            # First we set the values from preprocessors_kwargs which are constant
            for kwarg_name, kwarg_value in preprocessor_kwargs.items():
                instru_kwargs[f'{preprocessor_name}_{kwarg_name}'] = kwarg_value
            # Then we overwrite some of these values with nevergrad variables when necessary
            for kwarg_name, kwarg_value in preprocessor.get_nevergrad_variables().items():
                instru_kwargs[f'{preprocessor_name}_{kwarg_name}'] = kwarg_value
        return instru_kwargs

    def instru_kwargs_to_preprocessors_kwargs(instru_kwargs):
        preprocessors_kwargs = defaultdict(dict)
        for key, value in instru_kwargs.items():
            preprocessor_name, kwarg_name = re.match(r'([a-zA-Z0-9]+)_([a-z0-9_]+)', key).groups()
            preprocessors_kwargs[preprocessor_name][kwarg_name] = value
        return dict(preprocessors_kwargs)

    instru_kwargs = preprocessors_kwargs_to_instru_kwargs(preprocessors_kwargs)
    instru = Instrumentation(**instru_kwargs)
    if instru.dimension == 0:
        return preprocessors_kwargs
    # No need to search a lot when there is only a few parameters
    parametrization_budget = min(32**instru.dimension, parametrization_budget)
    optimizer = optimizerlib.ScrHammersleySearch(instrumentation=instru, budget=parametrization_budget, num_workers=1)
    recommendation = optimizer.optimize(evaluate_parametrization, verbosity=0)
    return instru_kwargs_to_preprocessors_kwargs(recommendation.kwargs)