def evaluate_model()

in model/disambiguate/train_model.py [0:0]


def evaluate_model(model, loader, batch_size, save_path=None, hidden_test=False):
    num_matches = 0
    results = collections.defaultdict(list)
    with torch.no_grad():
        for batch in progressbar(loader.get_entire_batch(batch_size)):
            output = model(batch)
            predictions = torch.argmax(output, dim=1)
            if not hidden_test:
                num_matches += (predictions == batch["gt_label"]).sum().item()

            # Save results if need be.
            if save_path:
                for ii in range(predictions.shape[0]):
                    new_instance = {
                        "turn_id": batch["turn_id"][ii],
                        "disambiguation_label": predictions[ii].cpu().item(),
                    }
                    results[batch["dialog_id"][ii]].append(new_instance)

    # Restructure results JSON and save.
    if save_path:
        results = [
            {"dialog_id": dialog_id, "predictions": predictions,}
            for dialog_id, predictions in results.items()
        ]
        print(f"Saving: {save_path}")
        with open(save_path, "w") as file_id:
            json.dump(results, file_id)

    accuracy = num_matches / loader.num_instances * 100
    return accuracy