lm_eval/tasks/wikitext/wikitext.yaml (20 lines of code) (raw):
task: wikitext
dataset_path: EleutherAI/wikitext_document_level
dataset_name: wikitext-2-raw-v1
output_type: loglikelihood_rolling
training_split: train
validation_split: validation
test_split: test
doc_to_text: ""
doc_to_target: !function preprocess_wikitext.wikitext_detokenizer
process_results: !function preprocess_wikitext.process_results
should_decontaminate: true
doc_to_decontamination_query: "{{page}}"
metric_list:
- metric: word_perplexity
- metric: byte_perplexity
- metric: bits_per_byte
metadata:
version: 2.0
dataset_kwargs:
trust_remote_code: true