def load()

in inference.py [0:0]


def load(repo_id):
    model = (
        AutoModelForCausalLM.from_pretrained(repo_id, torch_dtype=torch.float16, trust_remote_code=True)
        .to("cuda")
        .eval()
    )
    processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
    return model, processor