in blink/main_solr.py [0:0]
def main(parameters):
print("Parameters:", parameters)
# Read data
sentences = utils.read_sentences_from_file(
parameters["path_to_input_file"],
one_sentence_per_line=parameters["one_sentence_per_line"],
)
# Identify mentions
ner_model = NER.get_model(parameters)
ner_output_data = ner_model.predict(sentences)
sentences = ner_output_data["sentences"]
mentions = ner_output_data["mentions"]
output_folder_path = parameters["output_folder_path"]
if (
(output_folder_path is not None)
and os.path.exists(output_folder_path)
and os.listdir(output_folder_path)
):
print(
"The given output directory ({}) already exists and is not empty.".format(
output_folder_path
)
)
answer = input("Would you like to empty the existing directory? [Y/N]\n")
if answer.strip() == "Y":
print("Deleting {}...".format(output_folder_path))
shutil.rmtree(output_folder_path)
else:
raise ValueError(
"Output directory ({}) already exists and is not empty.".format(
output_folder_path
)
)
if output_folder_path is not None:
utils.write_dicts_as_json_per_line(
sentences, utils.get_sentences_txt_file_path(output_folder_path)
)
utils.write_dicts_as_json_per_line(
mentions, utils.get_mentions_txt_file_path(output_folder_path)
)
# Generate candidates and get the data that describes the candidates
candidate_generator = CG.get_model(parameters)
candidate_generator.process_mentions_for_candidate_generator(
sentences=sentences, mentions=mentions
)
for mention in mentions:
mention["candidates"] = candidate_generator.get_candidates(mention)
if parameters["consider_additional_datafetcher"]:
data_fetcher = CDF.get_model(parameters)
for candidate in mention["candidates"]:
data_fetcher.get_data_for_entity(candidate)
if output_folder_path is not None:
utils.write_dicts_as_json_per_line(
mentions, utils.get_mentions_txt_file_path(output_folder_path)
)
# Reranking
reranking_model = R.get_model(parameters)
reranking_model.rerank(mentions, sentences)
if output_folder_path is not None:
utils.write_dicts_as_json_per_line(
mentions, utils.get_mentions_txt_file_path(output_folder_path)
)
utils.write_end2end_pickle_output(sentences, mentions, output_folder_path)
utils.present_annotated_sentences(
sentences,
mentions,
utils.get_end2end_pretty_output_file_path(output_folder_path),
)
# Showcase results
utils.present_annotated_sentences(sentences, mentions)