in blink/candidate_ranking/evaluate.py [0:0]
def evaluate(parameters, logger=None):
reranker = utils.get_reranker(parameters)
if parameters["full_evaluation"]:
eval_datasets = [
"aida-A",
"aida-B",
"msnbc",
"aquaint",
"ace2004",
"clueweb",
"wikipedia",
]
else:
eval_datasets = ["aida-B"]
candidates_key = (
"pregenerated_candidates"
if parameters["evaluate_with_pregenerated_candidates"]
else "candidates"
)
gold_key = (
"pregenerated_gold_pos"
if parameters["evaluate_with_pregenerated_candidates"]
else "gold_pos"
)
number_of_samples_per_dataset = {}
total_time = 0
for eval_dataset_name in eval_datasets:
time_start = time.time()
logger.info("\nEvaluating on the {} dataset".format(eval_dataset_name))
eval_samples = utils.read_dataset(
eval_dataset_name, parameters["path_to_preprocessed_json_data"]
)
eval_samples_filtered = utils.filter_samples(
eval_samples, parameters["top_k"], gold_key
)
logger.info(
"Retained {} out of {} samples".format(
len(eval_samples_filtered), len(eval_samples)
)
)
number_of_samples_per_dataset[eval_dataset_name] = len(eval_samples)
# if args.num_preprocessing_threads == -1:
# eval_data, eval_tensor_data = process_samples_for_model(args.context_key, eval_samples_filtered, tokenizer, args.max_seq_length, logger = logger, top_k = args.top_k, example = False, debug = args.debug, tagged = args.tag_mention, candidates_key = candidates_key, gold_key = gold_key)
# else:
# eval_data, eval_tensor_data = preprocessing_multithreaded(eval_samples_filtered, logger, args, output_dir=True)
eval_data, eval_tensor_data = reranker._process_mentions_for_model(
parameters["context_key"],
eval_samples_filtered,
reranker.tokenizer,
parameters["max_seq_length"],
parameters["top_k"],
parameters["silent"],
candidates_key=candidates_key,
gold_key=gold_key,
debug=parameters["debug"],
)
eval_sampler = SequentialSampler(eval_tensor_data)
eval_dataloader = DataLoader(
eval_tensor_data,
sampler=eval_sampler,
batch_size=parameters["evaluation_batch_size"],
)
if parameters["output_eval_file"] is None:
output_eval_file = os.path.join(
parameters["path_to_model"], "eval_results.txt"
)
else:
output_eval_file = parameters["output_eval_file"]
result = evaluate_model_on_dataset(
reranker.model,
eval_dataloader,
eval_dataset_name,
eval_bm45_acc=True,
device=reranker.device,
logger=logger,
path_to_file_to_write_results=output_eval_file,
number_of_samples=number_of_samples_per_dataset[eval_dataset_name],
)
execution_time = (time.time() - time_start) / 60
total_time += execution_time
if logger != None:
logger.info(
"The execution for dataset {} took {} minutes".format(
eval_dataset_name, execution_time
)
)
else:
print(
"The execution for dataset {} took {} minutes".format(
eval_dataset_name, execution_time
)
)
if logger != None:
logger.info(
"The execution for dataset {} took {} minutes".format(
eval_dataset_name, execution_time
)
)
else:
print("The evaluation took:", total_time, " minutes")