in domainbed_measures/experiment/regression.py [0:0]
def report_top_subsets(analysis,
filter_dataset,
sort_by,
top_k=100,
canonicalize=False):
results = {}
for subset in analysis.all_feature_name_subsets():
analysis_subset = analysis.select(feature_name=subset,
dataset_name=filter_dataset)
results[subset] = {
'score': analysis_subset.score(),
'weight_variance': analysis_subset.weight_variance(),
'times_same_sign': analysis_subset.weight_sign_changes(),
'correlation_no_fit': analysis_subset.corr_without_fit(),
'correlation_with_fit': analysis_subset.corr_score_with_fit(),
}
print("Best subsets on %s according to %s score:" %
(filter_dataset, sort_by))
results_df = pd.DataFrame(results)
for k, v in dict_top_k(results, sort_by, top_k=top_k,
reverse=True).items():
print(
"{0}] R2:{1:.3f}| corr: {2:.3f} | corr/w/fit {3:.3f} times_same_sign: {4}"
.format(
k,
v['score'],
v['correlation_no_fit'],
v['correlation_with_fit'],
v['times_same_sign'],
))
results = results_df.transpose()
results['measure'] = [str(x[0]) for x in results.index]
results = results.reset_index()
canon = None
if canonicalize == True:
canon = (results['correlation_no_fit'] > 0).astype(float)
canon[canon == 0.0] = -1.0
results['correlation_no_fit'] = canon * results['correlation_no_fit']
results[
'correlation_with_fit'] = canon * results['correlation_with_fit']
canon.index = results['measure']
return results, canon