in evaluation/model_utils.py [0:0]
def load_hf_lm_and_tokenizer(
model_name_or_path,
tokenizer_name_or_path=None,
device_map="auto",
load_in_8bit=False,
load_in_half=True,
gptq_model=False,
use_fast_tokenizer=False,
padding_side="left",
use_safetensors=False,
):
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
if not tokenizer_name_or_path:
tokenizer_name_or_path = model_name_or_path
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_fast=use_fast_tokenizer, padding_side=padding_side, trust_remote_code=True)
# tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, legacy=False, use_fast=use_fast_tokenizer, padding_side=padding_side, trust_remote_code=True)
# set pad token to eos token if pad token is not set
if tokenizer.pad_token is None:
if tokenizer.unk_token:
tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.unk_token_id
elif tokenizer.eos_token:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
else:
raise ValueError("You are using a new tokenizer without a pad token."
"This is not supported by this script.")
# if tokenizer.pad_token is None:
# tokenizer.pad_token = tokenizer.unk_token
# tokenizer.pad_token_id = tokenizer.unk_token_id
if gptq_model:
from auto_gptq import AutoGPTQForCausalLM
model_wrapper = AutoGPTQForCausalLM.from_quantized(
model_name_or_path, device="cuda:0", use_triton=True
)
model = model_wrapper.model
elif load_in_8bit:
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path,
device_map=device_map,
load_in_8bit=True
)
else:
# return "", tokenizer
# defaul load in float16
model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
torch_dtype=torch.float16,
device_map=device_map,
trust_remote_code=True,
use_safetensors=use_safetensors)
if torch.cuda.is_available():
model = model.cuda()
if load_in_half:
model = model.half()
model.eval()
return model, tokenizer