in domainbed_measures/experiment/regression.py [0:0]
def perform_subset_analysis(data,
feature_lambda=None,
subset_features=3,
single_test_env_only=False,
shared_weights_across_envs=False,
normalize=False,
canon=None,
fix_one_feat_to=None,
target_name='target_err'):
print("Performing analysis on subsets of {} features".format(
subset_features))
print("--------------------------------------------\n")
if single_test_env_only == True:
data = data[data['all_test_envs'].apply(len) == 2]
if canon is not None:
for m in canon.index:
data[m] = canon[m] * data[m]
regressions = []
datasets = []
environments = []
correlations = []
common_regressors = {}
if shared_weights_across_envs == True:
print("Running regression across all envs.")
all_features, all_targets = get_data_condition(
data,
None,
None,
feature_lambda=feature_lambda,
target_name=target_name,
normalize=normalize)
for f, t in get_feature_subsets(all_features, all_targets,
subset_features):
common_regressors[tuple(f.columns)] = Regression(f, t)
for dataset in get_datasets(data):
for environment in get_dataset_environments(data, dataset):
condition = dataset + environment
features, target = get_data_condition(
data,
dataset,
environment,
feature_lambda=feature_lambda,
target_name=target_name,
normalize=normalize)
for f, t in get_feature_subsets(features, target, subset_features):
if fix_one_feat_to is not None:
if fix_one_feat_to not in tuple(f.columns):
continue
if shared_weights_across_envs == True:
this_common_regressor = common_regressors[tuple(f.columns)]
regressor = copy.copy(this_common_regressor)
else:
regressor = Regression(f, t)
regressor.score_and_store(f, t)
if subset_features == 1:
correlations.append(scipy.stats.spearmanr(f, t))
else:
correlations.append(None)
regressions.append(regressor)
datasets.append(dataset)
environments.append(environment)
analysis = SubsetAnalysis(regressions, datasets, environments,
correlations)
return analysis