in notebooks/utils.py [0:0]
def get_struct_outputs_per_dataset(run_infos,
weights,
dataset_key_suffix,
uid_key='uid',
eventual_fname='seen.json',
normalize_before_combine=None):
_, combined, dataset = get_epic_marginalize_late_fuse(
run_infos,
weights,
dataset_key_suffix=dataset_key_suffix,
uid_key=uid_key,
eventual_fname=eventual_fname,
normalize_before_combine=normalize_before_combine)
results = {}
# Now the following may not be true since if the run_info contains an
# actual json, it might have more rows etc.
# assert len(combined[0]) == len(dataset)
action_to_verb_noun = {
val: key
for key, val in dataset.verb_noun_to_action.items()
}
for uid in tqdm(combined[0].keys(), desc='Computing res'):
verb_res = {f'{j}': val for j, val in enumerate(combined[0][uid])}
noun_res = {f'{j}': val for j, val in enumerate(combined[1][uid])}
top_100_actions = sorted(np.argpartition(combined[2][uid],
-100)[-100:],
key=lambda x: -combined[2][uid][x])
action_res = {
','.join((str(el)
for el in action_to_verb_noun[j])): combined[2][uid][j]
for j in top_100_actions
}
results[f'{uid}'] = {
'verb': verb_res,
'noun': noun_res,
'action': action_res,
}
# Add in all the discarded dfs with uniform distribution
if dataset.discarded_df is not None:
for _, row in dataset.discarded_df.iterrows():
if str(row[uid_key]) in results:
continue
results[f'{row[uid_key]}'] = {
'verb':
{f'{j}': 0.0
for j in range(len(dataset.verb_classes))},
'noun':
{f'{j}': 0.0
for j in range(len(dataset.noun_classes))},
'action': {f'0,{j}': 0.0
for j in range(100)},
}
output_dict = {
'version': f'{dataset.version}',
'challenge': dataset.challenge_type,
'results': results
}
return output_dict