def render_many_sets()

in evals/elsuite/identifying_variables/renderers/corrset.py [0:0]


    def render_many_sets(self, correl_sets: List[Set[str]]):
        """
        Renders a causal graph where we have at least two correlation
        sets, one of which has at least two variables.
        The description looks like:
        ```
        In general, there were cases where some variables changed in tandem with each
        other, while others did not.
        {example of two variables that changed in tandem}
        {interleaved mentions of remaining variables, specifying which other already
        mentioned variables they changed in tandem with, if any}
        ```
        """
        # Sort the sets by size, largest first
        correl_sets = sorted(correl_sets, key=lambda x: len(x), reverse=True)
        variables = [var for correl_set in correl_sets for var in correl_set]

        correl_set_idx_to_already_mentioned_vars = [set() for _ in correl_sets]
        var_to_correl_set_idx = {
            var: idx for idx, correl_set in enumerate(correl_sets) for var in correl_set
        }
        return_string = templates.MANY_CORREL_SETS_MAIN

        # hard-code mention first two variables, from first (largest) set
        current_set_idx = 0
        return_string += "\n" + templates.CORREL_VARS_EXAMPLE.format(
            optional_transition="For example, ",
            # the first set is guaranteed to have at least two variables
            var_1=variables[0],
            var_2=variables[1],
        )
        correl_set_idx_to_already_mentioned_vars[0].update([variables[0], variables[1]])

        # go through remaining variables, randomly
        variables = variables[2:]
        self.rng.shuffle(variables)

        for var in variables:
            correl_set_idx = var_to_correl_set_idx[var]
            if correl_set_idx == current_set_idx:
                transition_word = self.rng.choice(["Similarly", "Likewise"])
                transition_phrase = f"{transition_word}, "
            else:
                transition_phrase = ""
                current_set_idx = correl_set_idx

            mentioned_vars_from_set = correl_set_idx_to_already_mentioned_vars[
                correl_set_idx
            ]
            if len(mentioned_vars_from_set) == 0:  # first time mentioning this set
                mention_string = templates.IND_VARS_EXAMPLE.format(
                    optional_transition=transition_phrase,
                    var_1=var,
                    var_2="previously mentioned variables",
                )
            else:  # variables from this set have been mentioned
                mention_string = templates.CORREL_VARS_EXAMPLE.format(
                    optional_transition=transition_phrase,
                    var_1=var,
                    var_2=templates.list_to_nl_list(list(mentioned_vars_from_set)),
                )
            return_string += "\n" + mention_string.capitalize()
            # we have now mentioned this variable
            correl_set_idx_to_already_mentioned_vars[correl_set_idx].add(var)

        return return_string