in evals/elsuite/identifying_variables/renderers/corrset.py [0:0]
def determine_sample_type(self, sample: Sample) -> Tuple[str, List[Set[str]]]:
"""
Determines the type of sample we have, returning the correlation sets in
the process. Accounts for unobserved variables by removing them from
the correlation sets.
Returns:
str: The type of causal graph we have, ignoring unobserved variables.
Either
- "many_correl_sets": there are at least two correlation sets, at least
one of which has at least two variables.
- "single_correl_set": there is only one correlation set.
- "only_ind": there are at least two correlation sets, all of which
have exactly one variable.
List[Set[str]]: The list of correlation sets. A correlation set is the
set of observed variables in a tree from the causal graph
"""
causal_graph = sample.causal_graph
graph_trees = graph_utils.find_graph_trees(causal_graph)
correl_sets = []
unobserved_vars = set(
var
for var in sample.variable_metadata
if sample.variable_metadata[var]["extra"]["sparsity_rate"]
> SPARSITY_FOR_UNOBS
)
for tree in graph_trees:
correl_set = set(tree)
for var in tree:
if var in unobserved_vars:
# correlations to unobserved variables are, well, unobserved
correl_set.remove(var)
correl_sets.append(correl_set)
# need to check for empty sets, since we removed unobserved variables
correl_sets = [correl_set for correl_set in correl_sets if len(correl_set) > 0]
if len(correl_sets) == 1:
return "single_correl_set", correl_sets
else:
for correl_set in correl_sets:
if len(correl_set) > 1:
# at least one set with more than one observed var
return "many_correl_sets", correl_sets
# all sets have only one node
return "only_ind", correl_sets