in inference.py [0:0]
def load(repo_id):
model = (
AutoModelForCausalLM.from_pretrained(repo_id, torch_dtype=torch.float16, trust_remote_code=True)
.to("cuda")
.eval()
)
processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
return model, processor