in src/models/struxgpt_base.py [0:0]
def build_model_local(self, debug=False):
if self.use_vllm:
if debug:
self.model = None
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_name_or_path,
use_fast=False,
padding_side="left",
trust_remote_code=True,
)
else:
self.model = LLM(
self.model_name_or_path,
tensor_parallel_size=torch.cuda.device_count(),
trust_remote_code=True,
# gpu_memory_utilization=0.6 # TODO
)
self.tokenizer = self.model.get_tokenizer()
else:
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_name_or_path,
use_fast=False,
padding_side="left",
trust_remote_code=True,
)
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name_or_path,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
)