in drqa/reader/vector.py [0:0]
def vectorize(ex, model, single_answer=False):
"""Torchify a single example."""
args = model.args
word_dict = model.word_dict
feature_dict = model.feature_dict
# Index words
document = torch.LongTensor([word_dict[w] for w in ex['document']])
question = torch.LongTensor([word_dict[w] for w in ex['question']])
# Create extra features vector
if len(feature_dict) > 0:
features = torch.zeros(len(ex['document']), len(feature_dict))
else:
features = None
# f_{exact_match}
if args.use_in_question:
q_words_cased = {w for w in ex['question']}
q_words_uncased = {w.lower() for w in ex['question']}
q_lemma = {w for w in ex['qlemma']} if args.use_lemma else None
for i in range(len(ex['document'])):
if ex['document'][i] in q_words_cased:
features[i][feature_dict['in_question']] = 1.0
if ex['document'][i].lower() in q_words_uncased:
features[i][feature_dict['in_question_uncased']] = 1.0
if q_lemma and ex['lemma'][i] in q_lemma:
features[i][feature_dict['in_question_lemma']] = 1.0
# f_{token} (POS)
if args.use_pos:
for i, w in enumerate(ex['pos']):
f = 'pos=%s' % w
if f in feature_dict:
features[i][feature_dict[f]] = 1.0
# f_{token} (NER)
if args.use_ner:
for i, w in enumerate(ex['ner']):
f = 'ner=%s' % w
if f in feature_dict:
features[i][feature_dict[f]] = 1.0
# f_{token} (TF)
if args.use_tf:
counter = Counter([w.lower() for w in ex['document']])
l = len(ex['document'])
for i, w in enumerate(ex['document']):
features[i][feature_dict['tf']] = counter[w.lower()] * 1.0 / l
# Maybe return without target
if 'answers' not in ex:
return document, features, question, ex['id']
# ...or with target(s) (might still be empty if answers is empty)
if single_answer:
assert(len(ex['answers']) > 0)
start = torch.LongTensor(1).fill_(ex['answers'][0][0])
end = torch.LongTensor(1).fill_(ex['answers'][0][1])
else:
start = [a[0] for a in ex['answers']]
end = [a[1] for a in ex['answers']]
return document, features, question, start, end, ex['id']