in src/lighteval/logging/info_loggers.py [0:0]
def aggregate(self):
"""
Aggregate the details and hashes for each task and then for all tasks.
We end up with a dict of compiled details for each task and a dict of compiled details for all tasks.
"""
for task_name in self.hashes:
compiled_hash = self.CompiledHash()
compiled_hash.hash_examples = xxhash.xxh64(
"".join(sorted(q.example for q in self.hashes[task_name]))
).hexdigest() # hash of all the hash - sorted for reproducibility
compiled_hash.hash_full_prompts = xxhash.xxh64(
"".join(sorted(q.full_prompt for q in self.hashes[task_name]))
).hexdigest() # hash of all the hash - sorted for reproducibility
compiled_hash.hash_input_tokens = xxhash.xxh64(
"".join(sorted(q.input_tokens for q in self.hashes[task_name]))
).hexdigest() # hash of all the hash - sorted for reproducibility
compiled_hash.hash_cont_tokens = xxhash.xxh64(
"".join(sorted(q.cont_tokens for q in self.hashes[task_name]))
).hexdigest() # hash of all the hash - sorted for reproducibility
self.compiled_hashes[task_name] = compiled_hash
for task_name, _ in self.details.items():
self.compiled_details[task_name].hashes = asdict(self.compiled_hashes[task_name])
hash_types: list[str] = list(self.compiled_details.values())[0].hashes.keys()
for hash_type in hash_types:
self.compiled_details_over_all_tasks.hashes[hash_type] = xxhash.xxh64(
"".join(
compiled_detail.hashes[hash_type] for _, compiled_detail in sorted(self.compiled_details.items())
)
).hexdigest()