def compute_metrics()

in jamba1.5-retriever/scripts/train.py [0:0]


def compute_metrics(eval_pred, compute_result=False):

    print("Inside compute_metric")
    print(f"eval_pred: {eval_pred}")
    print(f"compute_result: {compute_result}")

    (embeddings1, embeddings2), labels = eval_pred

    # Move tensors to CPU if they are on GPU
    embeddings1 = embeddings1.cpu() if embeddings1.is_cuda else embeddings1
    embeddings2 = embeddings2.cpu() if embeddings2.is_cuda else embeddings2

    # Ensure labels are on CPU and converted to numpy arrays as expected by sklearn. 
    labels = labels.cpu().detach().numpy() if labels.is_cuda else labels.detach().numpy()  

    print("Calculating Cosine Similarity")
    # Calculate cosine similarity between pairs
    cosine_sim = F.cosine_similarity(embeddings1, embeddings2).detach().cpu().numpy()
    print("After Cosine Similarity")
    print(f"cosine_sim: {cosine_sim}")

    print("Calculating Prediction")
    # Convert cosine similarity to binary predictions (1 for similar, -1 for dissimilar)
    predictions = [1 if sim >= 0.5 else -1 for sim in cosine_sim]
    print("After Prediction")
    print(f"predictions: {predictions}")

    # Calculate accuracy, precision, recall, F1 and support score
    print("Calculating Accuracy")
    accuracy = accuracy_score(labels, predictions)
    print("After Accuracy")
    print(f"predictions: {accuracy}")
    print("Calculating Precision, Recall, F1, Support")
    precision, recall, f1, support = precision_recall_fscore_support(labels, predictions, average='binary')
    print("After Precision, Recall, F1, Support")
    print(f"precision: {precision}, recall: {recall}, f1: {f1}")

    metrics = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'support': support
    }

    print(f"Calculated Metrics : {metrics}")

    return metrics