def run_inference()

in distilbert-base-uncased.py [0:0]


def run_inference(model_name, sample_text):
    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    model.to('cuda')

    # Tokenize the sample text
    inputs = tokenizer(sample_text, return_tensors='pt', truncation=True, padding='max_length', max_length=128)
    inputs = {key: value.to('cuda') for key, value in inputs.items()}

    # Run inference
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)

    return predictions.cpu().numpy()