docker_images/allennlp/app/pipelines/question_answering.py (36 lines of code) (raw):
import os
import shutil
from typing import Any, Dict
# Even though it is not imported, it is actually required, it downloads some stuff.
import allennlp_models # noqa: F401
from allennlp.predictors.predictor import Predictor
from app.pipelines import Pipeline
class QuestionAnsweringPipeline(Pipeline):
def __init__(
self,
model_id: str,
):
try:
self.predictor = Predictor.from_path("hf://" + model_id)
except (IOError, OSError):
nltk = os.getenv("NLTK_DATA")
if nltk is None:
raise
directory = os.path.join(nltk, "corpora")
shutil.rmtree(directory)
self.predictor = Predictor.from_path("hf://" + model_id)
def __call__(self, inputs: Dict[str, str]) -> Dict[str, Any]:
"""
Args:
inputs (:obj:`dict`):
a dictionary containing two keys, 'question' being the question being asked and 'context' being some text containing the answer.
Return:
A :obj:`dict`:. The object return should be like {"answer": "XXX", "start": 3, "end": 6, "score": 0.82} containing :
- "answer": the extracted answer from the `context`.
- "start": the offset within `context` leading to `answer`. context[start:stop] == answer
- "end": the ending offset within `context` leading to `answer`. context[start:stop] === answer
- "score": A score between 0 and 1 describing how confident the model is for this answer.
"""
allenlp_input = {"passage": inputs["context"], "question": inputs["question"]}
predictions = self.predictor.predict_json(allenlp_input)
start_token_idx, end_token_idx = predictions["best_span"]
start = predictions["token_offsets"][start_token_idx][0]
end = predictions["token_offsets"][end_token_idx][1]
score = (
predictions["span_end_probs"][end_token_idx]
* predictions["span_start_probs"][start_token_idx]
)
return {
"answer": predictions["best_span_str"],
"start": start,
"end": end,
"score": score,
}