in dpr/data/qa_validation.py [0:0]
def has_answer(answers, text, tokenizer, match_type) -> bool:
"""Check if a document contains an answer string.
If `match_type` is string, token matching is done between the text and answer.
If `match_type` is regex, we search the whole text with the regex.
"""
text = _normalize(text)
if match_type == "string":
# Answer is a list of possible strings
text = tokenizer.tokenize(text).words(uncased=True)
for single_answer in answers:
single_answer = _normalize(single_answer)
single_answer = tokenizer.tokenize(single_answer)
single_answer = single_answer.words(uncased=True)
for i in range(0, len(text) - len(single_answer) + 1):
if single_answer == text[i : i + len(single_answer)]:
return True
elif match_type == "regex":
# Answer is a regex
for single_answer in answers:
single_answer = _normalize(single_answer)
if regex_match(text, single_answer):
return True
return False