def evaluate()

in blink/candidate_ranking/evaluate.py [0:0]


def evaluate(parameters, logger=None):
    reranker = utils.get_reranker(parameters)

    if parameters["full_evaluation"]:
        eval_datasets = [
            "aida-A",
            "aida-B",
            "msnbc",
            "aquaint",
            "ace2004",
            "clueweb",
            "wikipedia",
        ]
    else:
        eval_datasets = ["aida-B"]

    candidates_key = (
        "pregenerated_candidates"
        if parameters["evaluate_with_pregenerated_candidates"]
        else "candidates"
    )
    gold_key = (
        "pregenerated_gold_pos"
        if parameters["evaluate_with_pregenerated_candidates"]
        else "gold_pos"
    )

    number_of_samples_per_dataset = {}
    total_time = 0

    for eval_dataset_name in eval_datasets:
        time_start = time.time()
        logger.info("\nEvaluating on the {} dataset".format(eval_dataset_name))
        eval_samples = utils.read_dataset(
            eval_dataset_name, parameters["path_to_preprocessed_json_data"]
        )
        eval_samples_filtered = utils.filter_samples(
            eval_samples, parameters["top_k"], gold_key
        )
        logger.info(
            "Retained {} out of {} samples".format(
                len(eval_samples_filtered), len(eval_samples)
            )
        )
        number_of_samples_per_dataset[eval_dataset_name] = len(eval_samples)

        # if args.num_preprocessing_threads == -1:
        #     eval_data, eval_tensor_data = process_samples_for_model(args.context_key, eval_samples_filtered, tokenizer, args.max_seq_length, logger = logger, top_k = args.top_k, example = False, debug = args.debug, tagged = args.tag_mention, candidates_key = candidates_key, gold_key = gold_key)
        # else:
        #     eval_data, eval_tensor_data = preprocessing_multithreaded(eval_samples_filtered, logger, args, output_dir=True)

        eval_data, eval_tensor_data = reranker._process_mentions_for_model(
            parameters["context_key"],
            eval_samples_filtered,
            reranker.tokenizer,
            parameters["max_seq_length"],
            parameters["top_k"],
            parameters["silent"],
            candidates_key=candidates_key,
            gold_key=gold_key,
            debug=parameters["debug"],
        )

        eval_sampler = SequentialSampler(eval_tensor_data)
        eval_dataloader = DataLoader(
            eval_tensor_data,
            sampler=eval_sampler,
            batch_size=parameters["evaluation_batch_size"],
        )

        if parameters["output_eval_file"] is None:
            output_eval_file = os.path.join(
                parameters["path_to_model"], "eval_results.txt"
            )
        else:
            output_eval_file = parameters["output_eval_file"]

        result = evaluate_model_on_dataset(
            reranker.model,
            eval_dataloader,
            eval_dataset_name,
            eval_bm45_acc=True,
            device=reranker.device,
            logger=logger,
            path_to_file_to_write_results=output_eval_file,
            number_of_samples=number_of_samples_per_dataset[eval_dataset_name],
        )

        execution_time = (time.time() - time_start) / 60
        total_time += execution_time
        if logger != None:
            logger.info(
                "The execution for dataset {} took {} minutes".format(
                    eval_dataset_name, execution_time
                )
            )
        else:
            print(
                "The execution for dataset {} took {} minutes".format(
                    eval_dataset_name, execution_time
                )
            )

    if logger != None:
        logger.info(
            "The execution for dataset {} took {} minutes".format(
                eval_dataset_name, execution_time
            )
        )
    else:
        print("The evaluation took:", total_time, " minutes")