in calculate_pr.py [0:0]
def qanlu_rec_to_conll(rec, predictions_):
context = rec['context']
slots = []
preds = []
for qa in rec['qas']:
if not qa['id'].startswith("intent_") and qa['answers']:
slots.append(
{'slot': qa['slot'], 'ans': qa['answers'][0]['text'], 'start': qa['answers'][0]['answer_start']})
pred_ans = predictions_[qa['id']]
if not qa['id'].startswith("intent_") and pred_ans:
preds.append({'slot': qa['slot'], 'ans': pred_ans, 'start': context.find(pred_ans)})
slots = sorted(slots, key=lambda i: i['start'], reverse=False)
preds = sorted(preds, key=lambda i: i['start'], reverse=False)
prev = 0
labels = []
for slot in slots:
curr = slot['start']
prev_sent = context[prev:curr]
labels.extend(['O'] * len(prev_sent.split()))
curr_slot_split = slot['ans'].split()
labels.append('B-' + slot['slot'])
if len(curr_slot_split) > 1:
for _ in curr_slot_split[1:]:
labels.append('I-' + slot['slot'])
prev = slot['start'] + len(slot['ans']) + 1
labels.extend(['O'] * len(context[prev:].split()))
y_true = labels
prev = 0
labels = []
for slot in preds:
curr = slot['start']
prev_sent = context[prev:curr]
labels.extend(['O'] * len(prev_sent.split()))
curr_slot_split = slot['ans'].split()
labels.append('B-' + slot['slot'])
if len(curr_slot_split) > 1:
for _ in curr_slot_split[1:]:
labels.append('I-' + slot['slot'])
prev = slot['start'] + len(slot['ans']) + 1
labels.extend(['O'] * len(context[prev:].split()))
y_pred = labels
return y_true, y_pred, context.split()