in data/question-gen/entropy_based_filtering.py [0:0]
def getEnvWiseStats(qns_dataset, templates):
env_wise_stats_json = {}
house_ids = list(set([qn['house'] for qn in qns_dataset]))
print ("Computing env-wise stats...")
for i in tqdm(range(len(house_ids))):
house_id = house_ids[i]
qns_for_house = [qn for qn in qns_dataset if qn['house'] == house_id]
# total unique questions (across all templates) before and after pruning
before = len(set([qn['question'] for qn in qns_for_house]))
after = len(set([qn['question'] for qn in qns_for_house if qn['accept']]))
drop_rate = (before - after) / (1. * before)
env_wise_stats_json[house_id] = {}
env_wise_stats_json[house_id]['global'] = {
'before': before,
'after': after,
'drop_rate': drop_rate
}
for template in templates:
qns_for_template_for_house = [qn for qn in qns_for_house if collapseType(qn['type']) == template]
before = len(set([qn['question'] for qn in qns_for_template_for_house]))
after = len(set([qn['question'] for qn in qns_for_template_for_house if qn['accept']]))
if before != 0.: drop_rate = (before - after) / (1. * before)
else: drop_rate = 0.
env_wise_stats_json[house_id][template] = {
'before': before,
'after': after,
'drop_rate': drop_rate
}
return env_wise_stats_json