in drqa/reader/predictor.py [0:0]
def __init__(self, model=None, tokenizer=None, normalize=True,
embedding_file=None, num_workers=None):
"""
Args:
model: path to saved model file.
tokenizer: option string to select tokenizer class.
normalize: squash output score to 0-1 probabilities with a softmax.
embedding_file: if provided, will expand dictionary to use all
available pretrained vectors in this file.
num_workers: number of CPU processes to use to preprocess batches.
"""
logger.info('Initializing model...')
self.model = DocReader.load(model or DEFAULTS['model'],
normalize=normalize)
if embedding_file:
logger.info('Expanding dictionary...')
words = utils.index_embedding_words(embedding_file)
added = self.model.expand_dictionary(words)
self.model.load_embeddings(added, embedding_file)
logger.info('Initializing tokenizer...')
annotators = tokenizers.get_annotators_for_model(self.model)
if not tokenizer:
tokenizer_class = DEFAULTS['tokenizer']
else:
tokenizer_class = tokenizers.get_class(tokenizer)
if num_workers is None or num_workers > 0:
self.workers = ProcessPool(
num_workers,
initializer=init,
initargs=(tokenizer_class, annotators),
)
else:
self.workers = None
self.tokenizer = tokenizer_class(annotators=annotators)