qwiklabs/llama2-finetuning-slurm/files/fine-tune.py

# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pathlib import Path from datasets import load_dataset, concatenate_datasets from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training import torch # Local path to model. Needs to be downloaded from Hugging Face model_path = "./llama2-7b-hf" finetuned_model_path = "./llama2-7b-american-stories" tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True) model = AutoModelForCausalLM.from_pretrained( model_path, torch_dtype=torch.float16, device_map="auto", trust_remote_code=True) # Get Dataset dataset = load_dataset("dell-research-harvard/AmericanStories", "subset_years", trust_remote_code=True, year_list=["1809", "1810", "1811", "1812", "1813", "1814", "1815"] ) dataset = concatenate_datasets(dataset.values()) if tokenizer.pad_token is None: tokenizer.add_special_tokens({'pad_token': '[PAD]'}) model.resize_token_embeddings(len(tokenizer)) data = dataset.map(lambda x: tokenizer( x["article"], padding='max_length', truncation=True)) lora_config = LoraConfig( r=16, lora_alpha=32, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) model = prepare_model_for_kbit_training(model) # add LoRA adaptor model = get_peft_model(model, lora_config) model.print_trainable_parameters() training_args = TrainingArguments( per_device_train_batch_size=1, gradient_accumulation_steps=4, warmup_steps=2, num_train_epochs=1, learning_rate=2e-4, fp16=True, logging_steps=1, output_dir=finetuned_model_path, optim="paged_adamw_32bit", ) trainer = Trainer( model=model, train_dataset=data, args=training_args, data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False), ) model.config.use_cache = False # silence the warnings. Please re-enable for inference! trainer.train() # Merge the fine tuned layer with the base model and save it # you can remove the line below if you only want to store the LoRA layer model = model.merge_and_unload() model.save_pretrained(finetuned_model_path) tokenizer.save_pretrained(finetuned_model_path) # Beginning of story in the dataset prompt = """ In the late action between Generals Brown and Riall, it appears our men fought with a courage and perseverance, that would """ input_ids = tokenizer(prompt, return_tensors="pt").input_ids gen_tokens = model.generate( input_ids, do_sample=True, temperature=0.8, max_length=100, ) print("Output from test prompt\n") print(tokenizer.batch_decode(gen_tokens)[0])

qwiklabs/llama2-finetuning-slurm/files/fine-tune.py (62 lines of code) (raw):