def homogeneize_ending_conditions()

in src/lighteval/models/nanotron/nanotron_model.py [0:0]


    def homogeneize_ending_conditions(self, ending_condition: tuple | dict | list | str) -> tuple[list, int]:
        """Ending conditions are submitted in several possible formats.
        By default in lighteval we pass them as tuples (stop sequence, max number of items).
        In the harness they sometimes are passed as dicts {"until": .., "max_length": ...} or
        as only ending conditions, either lists or strings.
        Here, we convert all these formats to a tuple containing a list of ending conditions,
        and a float for the max length allowed.
        """
        max_tokens, stop_sequences = None, None
        # Filling with input values or default
        if isinstance(ending_condition, tuple) and len(ending_condition) == 2:
            stop_sequence_arg, max_gen_tokens_arg = ending_condition
            stop_sequences = as_list(stop_sequence_arg)
            max_tokens = max_gen_tokens_arg
        elif isinstance(ending_condition, dict):  # Tasks in the harness sometimes pass a dict to rf.greedy_until
            try:
                stop_sequences = as_list(ending_condition["until"])
            except KeyError:
                stop_sequences = []
            try:
                max_tokens = ending_condition["max_length"]
            except KeyError:
                max_tokens = self._max_gen_toks
        else:  # only gave stop sequences  as an ending condition
            stop_sequences = as_list(ending_condition)

        # Managing empty cases
        if max_tokens is None:
            max_tokens = self._max_gen_toks
        if stop_sequences is None or (len(stop_sequences) == 1 and stop_sequences[0] is None):  # or num_fewshot == 0:
            stop_tokens = [self.eot_token]
        else:
            stop_tokens = list(stop_sequences) + [self.eot_token]

        assert isinstance(max_tokens, int)
        assert isinstance(stop_tokens, list)

        return stop_tokens, max_tokens