def evaluate()

in scripts/scorer.py [0:0]


def evaluate(scores, labels, CLASSES):
    """
    Evaluates the predicted classes w.r.t. a gold file.
    """

    vocab_map = dict([(i, v) for i, v in enumerate(CLASSES)])

    mlb = MultiLabelBinarizer()
    mlb.fit([CLASSES])
    # Hack to maintain order
    mlb.classes_ = np.array(CLASSES)

    gold_label = mlb.transform(labels.tolist())
    pred_score = np.matrix(scores.tolist())
    pred_label = (pred_score > 0.5).astype(int)
    roc_auc = roc_auc_score(gold_label, pred_score, average="micro", multi_class="ovr")
    f1 = f1_score(gold_label, pred_label, average="micro")
    return f1, roc_auc