def get_struct_outputs_per_dataset()

in notebooks/utils.py [0:0]


def get_struct_outputs_per_dataset(run_infos,
                                   weights,
                                   dataset_key_suffix,
                                   uid_key='uid',
                                   eventual_fname='seen.json',
                                   normalize_before_combine=None):
    _, combined, dataset = get_epic_marginalize_late_fuse(
        run_infos,
        weights,
        dataset_key_suffix=dataset_key_suffix,
        uid_key=uid_key,
        eventual_fname=eventual_fname,
        normalize_before_combine=normalize_before_combine)
    results = {}
    # Now the following may not be true since if the run_info contains an
    # actual json, it might have more rows etc.
    # assert len(combined[0]) == len(dataset)
    action_to_verb_noun = {
        val: key
        for key, val in dataset.verb_noun_to_action.items()
    }
    for uid in tqdm(combined[0].keys(), desc='Computing res'):
        verb_res = {f'{j}': val for j, val in enumerate(combined[0][uid])}
        noun_res = {f'{j}': val for j, val in enumerate(combined[1][uid])}
        top_100_actions = sorted(np.argpartition(combined[2][uid],
                                                 -100)[-100:],
                                 key=lambda x: -combined[2][uid][x])
        action_res = {
            ','.join((str(el)
                      for el in action_to_verb_noun[j])): combined[2][uid][j]
            for j in top_100_actions
        }
        results[f'{uid}'] = {
            'verb': verb_res,
            'noun': noun_res,
            'action': action_res,
        }
    # Add in all the discarded dfs with uniform distribution
    if dataset.discarded_df is not None:
        for _, row in dataset.discarded_df.iterrows():
            if str(row[uid_key]) in results:
                continue
            results[f'{row[uid_key]}'] = {
                'verb':
                {f'{j}': 0.0
                 for j in range(len(dataset.verb_classes))},
                'noun':
                {f'{j}': 0.0
                 for j in range(len(dataset.noun_classes))},
                'action': {f'0,{j}': 0.0
                           for j in range(100)},
            }
    output_dict = {
        'version': f'{dataset.version}',
        'challenge': dataset.challenge_type,
        'results': results
    }
    return output_dict