anli/src/nli/train_with_confidence.py [1019:1131]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
            )


id2label = {
    0: "e",
    1: "n",
    2: "c",
    -1: "-",
}


def count_acc(gt_list, pred_list):
    assert len(gt_list) == len(pred_list)
    gt_dict = list_dict_data_tool.list_to_dict(gt_list, "uid")
    pred_list = list_dict_data_tool.list_to_dict(pred_list, "uid")
    total_count = 0
    hit = 0
    for key, value in pred_list.items():
        if gt_dict[key]["label"] == value["predicted_label"]:
            hit += 1
        total_count += 1
    return hit, total_count


def evaluation_dataset(args, eval_dataloader, eval_list, model, r_dict, eval_name):
    # r_dict = dict()
    pred_output_list = eval_model(model, eval_dataloader, args.global_rank, args)
    predictions = pred_output_list
    hit, total = count_acc(eval_list, pred_output_list)

    print(debug_node_info(args), f"{eval_name} Acc:", hit, total, hit / total)

    r_dict[f"{eval_name}"] = {
        "acc": hit / total,
        "correct_count": hit,
        "total_count": total,
        "predictions": predictions,
    }


def eval_model(model, dev_dataloader, device_num, args):
    model.eval()

    uid_list = []
    y_list = []
    pred_list = []
    logits_list = []

    with torch.no_grad():
        for i, batch in enumerate(dev_dataloader, 0):
            batch = move_to_device(batch, device_num)

            if args.model_class_name in ["distilbert", "bart-large"]:
                outputs = model(
                    batch["input_ids"],
                    attention_mask=batch["attention_mask"],
                    labels=batch["y"],
                )
            else:
                outputs = model(
                    batch["input_ids"],
                    attention_mask=batch["attention_mask"],
                    token_type_ids=batch["token_type_ids"],
                    labels=batch["y"],
                )

            loss, logits = outputs[:2]

            uid_list.extend(list(batch["uid"]))
            y_list.extend(batch["y"].tolist())
            pred_list.extend(torch.max(logits, 1)[1].view(logits.size(0)).tolist())
            logits_list.extend(logits.tolist())

    assert len(pred_list) == len(logits_list)
    assert len(pred_list) == len(logits_list)

    result_items_list = []
    for i in range(len(uid_list)):
        r_item = dict()
        r_item["uid"] = uid_list[i]
        r_item["logits"] = logits_list[i]
        r_item["predicted_label"] = id2label[pred_list[i]]

        result_items_list.append(r_item)

    return result_items_list


def debug_node_info(args):
    names = ["global_rank", "local_rank", "node_rank"]
    values = []

    for name in names:
        if name in args:
            values.append(getattr(args, name))
        else:
            return "Pro:No node info "

    return (
        "Pro:"
        + "|".join([f"{name}:{value}" for name, value in zip(names, values)])
        + "||Print:"
    )


if __name__ == "__main__":
    args = get_args()
    d = datetime.datetime.today()
    main_exp_type = f"nli_{args.model_class_name}_{args.experiment_name}"
    # logdir = Path.cwd()
    exp_dir = (
        Path("/checkpoint/koustuvs")
        / "projects"
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


anli/src/nli/train_with_scramble.py [1141:1253]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                )


id2label = {
    0: "e",
    1: "n",
    2: "c",
    -1: "-",
}


def count_acc(gt_list, pred_list):
    assert len(gt_list) == len(pred_list)
    gt_dict = list_dict_data_tool.list_to_dict(gt_list, "uid")
    pred_list = list_dict_data_tool.list_to_dict(pred_list, "uid")
    total_count = 0
    hit = 0
    for key, value in pred_list.items():
        if gt_dict[key]["label"] == value["predicted_label"]:
            hit += 1
        total_count += 1
    return hit, total_count


def evaluation_dataset(args, eval_dataloader, eval_list, model, r_dict, eval_name):
    # r_dict = dict()
    pred_output_list = eval_model(model, eval_dataloader, args.global_rank, args)
    predictions = pred_output_list
    hit, total = count_acc(eval_list, pred_output_list)

    print(debug_node_info(args), f"{eval_name} Acc:", hit, total, hit / total)

    r_dict[f"{eval_name}"] = {
        "acc": hit / total,
        "correct_count": hit,
        "total_count": total,
        "predictions": predictions,
    }


def eval_model(model, dev_dataloader, device_num, args):
    model.eval()

    uid_list = []
    y_list = []
    pred_list = []
    logits_list = []

    with torch.no_grad():
        for i, batch in enumerate(dev_dataloader, 0):
            batch = move_to_device(batch, device_num)

            if args.model_class_name in ["distilbert", "bart-large"]:
                outputs = model(
                    batch["input_ids"],
                    attention_mask=batch["attention_mask"],
                    labels=batch["y"],
                )
            else:
                outputs = model(
                    batch["input_ids"],
                    attention_mask=batch["attention_mask"],
                    token_type_ids=batch["token_type_ids"],
                    labels=batch["y"],
                )

            loss, logits = outputs[:2]

            uid_list.extend(list(batch["uid"]))
            y_list.extend(batch["y"].tolist())
            pred_list.extend(torch.max(logits, 1)[1].view(logits.size(0)).tolist())
            logits_list.extend(logits.tolist())

    assert len(pred_list) == len(logits_list)
    assert len(pred_list) == len(logits_list)

    result_items_list = []
    for i in range(len(uid_list)):
        r_item = dict()
        r_item["uid"] = uid_list[i]
        r_item["logits"] = logits_list[i]
        r_item["predicted_label"] = id2label[pred_list[i]]

        result_items_list.append(r_item)

    return result_items_list


def debug_node_info(args):
    names = ["global_rank", "local_rank", "node_rank"]
    values = []

    for name in names:
        if name in args:
            values.append(getattr(args, name))
        else:
            return "Pro:No node info "

    return (
        "Pro:"
        + "|".join([f"{name}:{value}" for name, value in zip(names, values)])
        + "||Print:"
    )


if __name__ == "__main__":
    args = get_args()
    d = datetime.datetime.today()
    main_exp_type = f"nli_{args.model_class_name}_{args.experiment_name}"
    # logdir = Path.cwd()
    exp_dir = (
        Path("/checkpoint/koustuvs")
        / "projects"
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -