leaderboard/irt/evaluate.py [69:98]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    predictions = LeaderboardPredictions.parse_file(
        DATA_ROOT / conf["squad"]["submission_predictions"][fold]
    )
    if evaluation == "heldout":
        splits = LeaderboardSplits.parse_file(conf["squad"]["leaderboard_splits"][fold])
        test_items = {(r.model_id, r.example_id) for r in splits.test}

        def use_item(model_id: str, example_id: str):
            return (model_id, example_id) in test_items

    elif evaluation == "full":

        def use_item(model_id: str, example_id: str):
            # pylint: disable=unused-argument
            return True

    else:
        raise ValueError(f"Invalid irt evaluation type: {evaluation}")

    pred_probs = []
    pred_labels = []
    labels = []
    for model_id, preds in track(predictions.scored_predictions.items()):
        em_scores = preds["exact_match"]
        for example_id, score in em_scores.items():
            if not use_item(model_id, example_id):
                continue
            score = int(score)
            example_stats = parameters.example_stats[example_id]
            model_stats = parameters.model_stats[model_id]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


leaderboard/irt/evaluate.py [146:175]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    predictions = LeaderboardPredictions.parse_file(
        DATA_ROOT / conf["squad"]["submission_predictions"][fold]
    )
    if evaluation == "heldout":
        splits = LeaderboardSplits.parse_file(conf["squad"]["leaderboard_splits"][fold])
        test_items = {(r.model_id, r.example_id) for r in splits.test}

        def use_item(model_id: str, example_id: str):
            return (model_id, example_id) in test_items

    elif evaluation == "full":

        def use_item(model_id: str, example_id: str):
            # pylint: disable=unused-argument
            return True

    else:
        raise ValueError(f"Invalid irt evaluation type: {evaluation}")

    pred_probs = []
    pred_labels = []
    labels = []
    for model_id, preds in track(predictions.scored_predictions.items()):
        em_scores = preds["exact_match"]
        for example_id, score in em_scores.items():
            if not use_item(model_id, example_id):
                continue
            score = int(score)
            example_stats = parameters.example_stats[example_id]
            model_stats = parameters.model_stats[model_id]
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -