in E2E_TOD/eval.py [0:0]
def _evaluateGeneratedDialogue(self, dialog, goal, real_requestables, counts,
soft_acc=False, same_eval_as_cambridge=False):
"""Evaluates the dialogue created by the model.
First we load the user goal of the dialogue, then for each turn
generated by the system we look for key-words.
For the Inform rate we look whether the entity was proposed.
For the Success rate we look for requestables slots"""
# for computing corpus success
#'id'
requestables = self.requestables
# CHECK IF MATCH HAPPENED
provided_requestables = {}
venue_offered = {}
domains_in_goal = []
bspans = {}
for domain in goal.keys():
venue_offered[domain] = []
provided_requestables[domain] = []
domains_in_goal.append(domain)
for t, turn in enumerate(dialog):
if t == 0:
continue
sent_t = turn['resp_gen']
# sent_t = turn['resp']
for domain in goal.keys():
# for computing success
if same_eval_as_cambridge:
# [restaurant_name], [hotel_name] instead of [value_name]
if self.cfg.use_true_domain_for_ctr_eval:
dom_pred = [d[1:-1] for d in turn['dspn'].split()]
else:
dom_pred = [d[1:-1] for d in turn['dspn_gen'].split()]
# else:
# raise NotImplementedError('Just use true domain label')
if domain not in dom_pred: # fail
continue
if '[value_name]' in sent_t or '[value_id]' in sent_t:
if domain in ['restaurant', 'hotel', 'attraction', 'train']:
# HERE YOU CAN PUT YOUR BELIEF STATE ESTIMATION
if not self.cfg.use_true_curr_bspn and not self.cfg.use_true_bspn_for_ctr_eval:
bspn = turn['bspn_gen']
else:
bspn = turn['bspn']
# bspn = turn['bspn']
constraint_dict = self.reader.bspan_to_constraint_dict(bspn)
if constraint_dict.get(domain):
venues = self.reader.db.queryJsons(domain, constraint_dict[domain], return_name=True)
else:
venues = []
# if venue has changed
if len(venue_offered[domain]) == 0 and venues:
# venue_offered[domain] = random.sample(venues, 1)
venue_offered[domain] = venues
bspans[domain] = constraint_dict[domain]
else:
# flag = False
# for ven in venues:
# if venue_offered[domain][0] == ven:
# flag = True
# break
# if not flag and venues:
flag = False
for ven in venues:
if ven not in venue_offered[domain]:
# if ven not in venue_offered[domain]:
flag = True
break
# if flag and venues:
if flag and venues: # sometimes there are no results so sample won't work
# print venues
# venue_offered[domain] = random.sample(venues, 1)
venue_offered[domain] = venues
bspans[domain] = constraint_dict[domain]
else: # not limited so we can provide one
venue_offered[domain] = '[value_name]'
# ATTENTION: assumption here - we didn't provide phone or address twice! etc
for requestable in requestables:
if requestable == 'reference':
if '[value_reference]' in sent_t:
if 'booked' in turn['pointer'] or 'ok' in turn['pointer']: # if pointer was allowing for that?
provided_requestables[domain].append('reference')
# provided_requestables[domain].append('reference')
else:
if '[value_' + requestable + ']' in sent_t:
provided_requestables[domain].append(requestable)
# if name was given in the task
for domain in goal.keys():
# if name was provided for the user, the match is being done automatically
if 'name' in goal[domain]['informable']:
venue_offered[domain] = '[value_name]'
# special domains - entity does not need to be provided
if domain in ['taxi', 'police', 'hospital']:
venue_offered[domain] = '[value_name]'
if domain == 'train':
if not venue_offered[domain] and 'id' not in goal[domain]['requestable']:
venue_offered[domain] = '[value_name]'
"""
Given all inform and requestable slots
we go through each domain from the user goal
and check whether right entity was provided and
all requestable slots were given to the user.
The dialogue is successful if that's the case for all domains.
"""
# HARD EVAL
stats = {'restaurant': [0, 0, 0], 'hotel': [0, 0, 0], 'attraction': [0, 0, 0], 'train': [0, 0, 0],
'taxi': [0, 0, 0],
'hospital': [0, 0, 0], 'police': [0, 0, 0]}
match = 0
success = 0
# MATCH
for domain in goal.keys():
match_stat = 0
if domain in ['restaurant', 'hotel', 'attraction', 'train']:
goal_venues = self.reader.db.queryJsons(domain, goal[domain]['informable'], return_name=True)
if type(venue_offered[domain]) is str and '_name' in venue_offered[domain]:
match += 1
match_stat = 1
elif len(venue_offered[domain]) > 0 and len(set(venue_offered[domain])& set(goal_venues))>0:
match += 1
match_stat = 1
else:
if '_name]' in venue_offered[domain]:
match += 1
match_stat = 1
stats[domain][0] = match_stat
stats[domain][2] = 1
if soft_acc:
match = float(match)/len(goal.keys())
else:
if match == len(goal.keys()):
match = 1.0
else:
match = 0.0
for domain in domains_in_goal:
for request in real_requestables[domain]:
counts[request+'_total'] += 1
if request in provided_requestables[domain]:
counts[request+'_offer'] += 1
# SUCCESS
if match == 1.0:
for domain in domains_in_goal:
success_stat = 0
domain_success = 0
if len(real_requestables[domain]) == 0:
success += 1
success_stat = 1
stats[domain][1] = success_stat
continue
# if values in sentences are super set of requestables
# for request in set(provided_requestables[domain]):
# if request in real_requestables[domain]:
# domain_success += 1
for request in real_requestables[domain]:
if request in provided_requestables[domain]:
domain_success += 1
# if domain_success >= len(real_requestables[domain]):
if domain_success == len(real_requestables[domain]):
success += 1
success_stat = 1
stats[domain][1] = success_stat
# final eval
if soft_acc:
success = float(success)/len(real_requestables)
else:
if success >= len(real_requestables):
success = 1
else:
success = 0
return success, match, stats, counts