in src/lighteval/models/nanotron/nanotron_model.py [0:0]
def homogeneize_ending_conditions(self, ending_condition: tuple | dict | list | str) -> tuple[list, int]:
"""Ending conditions are submitted in several possible formats.
By default in lighteval we pass them as tuples (stop sequence, max number of items).
In the harness they sometimes are passed as dicts {"until": .., "max_length": ...} or
as only ending conditions, either lists or strings.
Here, we convert all these formats to a tuple containing a list of ending conditions,
and a float for the max length allowed.
"""
max_tokens, stop_sequences = None, None
# Filling with input values or default
if isinstance(ending_condition, tuple) and len(ending_condition) == 2:
stop_sequence_arg, max_gen_tokens_arg = ending_condition
stop_sequences = as_list(stop_sequence_arg)
max_tokens = max_gen_tokens_arg
elif isinstance(ending_condition, dict): # Tasks in the harness sometimes pass a dict to rf.greedy_until
try:
stop_sequences = as_list(ending_condition["until"])
except KeyError:
stop_sequences = []
try:
max_tokens = ending_condition["max_length"]
except KeyError:
max_tokens = self._max_gen_toks
else: # only gave stop sequences as an ending condition
stop_sequences = as_list(ending_condition)
# Managing empty cases
if max_tokens is None:
max_tokens = self._max_gen_toks
if stop_sequences is None or (len(stop_sequences) == 1 and stop_sequences[0] is None): # or num_fewshot == 0:
stop_tokens = [self.eot_token]
else:
stop_tokens = list(stop_sequences) + [self.eot_token]
assert isinstance(max_tokens, int)
assert isinstance(stop_tokens, list)
return stop_tokens, max_tokens