def getEnvWiseStats()

in data/question-gen/entropy_based_filtering.py [0:0]


def getEnvWiseStats(qns_dataset, templates):
    env_wise_stats_json = {}
    house_ids = list(set([qn['house'] for qn in qns_dataset]))

    print ("Computing env-wise stats...")
    for i in tqdm(range(len(house_ids))):
        house_id = house_ids[i]
        qns_for_house = [qn for qn in qns_dataset if qn['house'] == house_id]

        # total unique questions (across all templates) before and after pruning
        before = len(set([qn['question'] for qn in qns_for_house]))
        after = len(set([qn['question'] for qn in qns_for_house if qn['accept']]))
        drop_rate = (before - after) / (1. * before)

        env_wise_stats_json[house_id] = {}
        env_wise_stats_json[house_id]['global'] = {
            'before': before,
            'after': after,
            'drop_rate': drop_rate
        }

        for template in templates:
            qns_for_template_for_house = [qn for qn in qns_for_house if collapseType(qn['type']) == template]
            before = len(set([qn['question'] for qn in qns_for_template_for_house]))
            after = len(set([qn['question'] for qn in qns_for_template_for_house if qn['accept']]))
            if before != 0.: drop_rate = (before - after) / (1. * before)
            else: drop_rate = 0.

            env_wise_stats_json[house_id][template] = {
                'before': before,
                'after': after,
                'drop_rate': drop_rate
            }

    return env_wise_stats_json