in domainbed_measures/experiment/experiment.py [0:0]
def load_generalization_gap(out_results: pd.DataFrame,
test_envs: List[int],
test_env_idx: int,
dirty_ood_split: str,
model_selection: str = "latest") -> List[List]:
# For out-of-domain accuracy only look at in+out performance
if dirty_ood_split not in ["in", "out"]:
raise ValueError(
f"Invalid value for dirty_ood_split: {dirty_ood_split}")
if model_selection != "latest":
raise ValueError
ood_out_domains = []
# Columns with results are like 'env1_out_acc' or 'env2_in_acc' and so on.
all_envs_acc = [
x for x in out_results.columns if 'env' in x and 'acc' in x
]
ood_out_domains = []
wd_out_domains = []
in_domains = []
clean_ood_split = "in"
if dirty_ood_split == "in":
clean_ood_split = "out"
del dirty_ood_split
for e in all_envs_acc:
if not ('in' in e or 'out' in e):
raise ValueError("Unexpected env accuracy specifier %s" % (e))
if int(e.split('_')[0].strip(
'env')) == test_env_idx and clean_ood_split in e:
ood_out_domains.append(e)
elif int(e.split('_')[0].strip('env')) not in test_envs and 'out' in e:
wd_out_domains.append(e)
elif int(e.split('_')[0].strip('env')) not in test_envs and 'in' in e:
in_domains.append(e)
in_domain_perf = out_results[in_domains].mean(1)
ood_out_domain_perf = out_results[ood_out_domains].mean(1)
wd_out_domain_perf = out_results[wd_out_domains].mean(1)
ood_gap = in_domain_perf - ood_out_domain_perf
wd_gap = in_domain_perf - wd_out_domain_perf
return (ood_gap.iloc[-1], wd_gap.iloc[-1], in_domain_perf.iloc[-1],
ood_out_domain_perf.iloc[-1], wd_out_domain_perf.iloc[-1])