in blink/candidate_generation.py [0:0]
def get_candidates(self, mention_data):
solr = self.solr
# Build query
keys = self.keys
query = self.query
if not self.raw_solr_fields:
query = query.format(
*[
BM45_Candidate_Generator.solr_escape(mention_data[key])
if key in mention_data
else utils.get_sent_context(mention_data, key)
for key in keys
]
)
else:
query = query.format(
*[
mention_data[key]
if key in mention_data
else utils.get_sent_context(mention_data, key)
for key in keys
]
)
try:
results = solr.search(query, **self.query_arguments)
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
print("\nException:", exc_type, "- line", exc_tb.tb_lineno)
print(repr(e))
c = self.c
if c < 10:
print(
"Exception with: \naddress: {} \nquery: {} \nmention_data: {} \n".format(
self.solr_address, query, str(mention_data)
)
)
self.c = c + 1
return []
# Filter the data in the retrieved objects, while ignoring the ones without a wikidata_id (only a very small fraction in the dataset; they are noise)
filtered_results = [
self._filter_result(cand) for cand in results.docs if "wikidata_id" in cand
]
return filtered_results