anli/src/nli/train_with_scramble.py [163:415]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
}

nli_label2index = {
    "e": 0,
    "n": 1,
    "c": 2,
    "h": -1,
}


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)


class NLIDataset(Dataset):
    def __init__(self, data_list, transform) -> None:
        super().__init__()
        self.d_list = data_list
        self.len = len(self.d_list)
        self.transform = transform

    def __getitem__(self, index: int):
        return self.transform(self.d_list[index])

    # you should write schema for each of the input elements

    def __len__(self) -> int:
        return self.len


class NLITransform(object):
    def __init__(self, model_name, tokenizer, max_length=None):
        self.model_name = model_name
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, sample):
        processed_sample = dict()
        processed_sample["uid"] = sample["uid"]
        processed_sample["gold_label"] = sample["label"]
        processed_sample["y"] = nli_label2index[sample["label"]]

        # premise: str = sample['premise']
        premise: str = sample["context"] if "context" in sample else sample["premise"]
        hypothesis: str = sample["hypothesis"]

        if premise.strip() == "":
            premise = "empty"

        if hypothesis.strip() == "":
            hypothesis = "empty"

        tokenized_input_seq_pair = self.tokenizer.encode_plus(
            premise,
            hypothesis,
            max_length=self.max_length,
            return_token_type_ids=True,
            truncation=True,
        )

        processed_sample.update(tokenized_input_seq_pair)

        return processed_sample


class FlippedNLITransform(object):
    def __init__(self, model_name, tokenizer, max_length=None):
        self.model_name = model_name
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, sample):
        processed_sample = dict()
        processed_sample["uid"] = sample["uid"]
        processed_sample["gold_label"] = sample["label"]
        processed_sample["y"] = nli_label2index[sample["label"]]

        # premise: str = sample['premise']
        premise: str = sample["context"] if "context" in sample else sample["premise"]
        hypothesis: str = sample["hypothesis"]

        if premise.strip() == "":
            premise = "empty"

        if hypothesis.strip() == "":
            hypothesis = "empty"

        tokenized_input_seq_pair = self.tokenizer.encode_plus(
            hypothesis,
            premise,
            max_length=self.max_length,
            return_token_type_ids=True,
            truncation=True,
        )

        processed_sample.update(tokenized_input_seq_pair)

        return processed_sample


def build_eval_dataset_loader_and_sampler(
    d_list, data_transformer, batching_schema, batch_size_per_gpu_eval
):
    d_dataset = NLIDataset(d_list, data_transformer)
    d_sampler = SequentialSampler(d_dataset)
    d_dataloader = DataLoader(
        dataset=d_dataset,
        batch_size=batch_size_per_gpu_eval,
        shuffle=False,  #
        num_workers=0,
        pin_memory=True,
        sampler=d_sampler,
        collate_fn=BaseBatchBuilder(batching_schema),
    )  #
    return d_dataset, d_sampler, d_dataloader


def sample_data_list(d_list, ratio):
    if ratio <= 0:
        raise ValueError(
            "Invalid training weight ratio. Please change --train_weights."
        )
    upper_int = int(math.ceil(ratio))
    if upper_int == 1:
        return d_list  # if ratio is 1 then we just return the data list
    else:
        sampled_d_list = []
        for _ in range(upper_int):
            sampled_d_list.extend(copy.deepcopy(d_list))
        if np.isclose(ratio, upper_int):
            return sampled_d_list
        else:
            sampled_length = int(ratio * len(d_list))
            random.shuffle(sampled_d_list)
            return sampled_d_list[:sampled_length]


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--cpu", action="store_true", help="If set, we only use CPU.")
    parser.add_argument(
        "--single_gpu", action="store_true", help="If set, we only use single GPU."
    )
    parser.add_argument("--fp16", action="store_true", help="If set, we will use fp16.")

    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )

    # environment arguments
    parser.add_argument(
        "-s", "--seed", default=1, type=int, metavar="N", help="manual random seed"
    )
    parser.add_argument(
        "-n", "--num_nodes", default=1, type=int, metavar="N", help="number of nodes"
    )
    parser.add_argument(
        "-g", "--gpus_per_node", default=1, type=int, help="number of gpus per node"
    )
    parser.add_argument(
        "-nr", "--node_rank", default=0, type=int, help="ranking within the nodes"
    )

    # experiments specific arguments
    parser.add_argument(
        "--debug_mode",
        action="store_true",
        dest="debug_mode",
        help="weather this is debug mode or normal",
    )

    parser.add_argument(
        "--model_class_name", type=str, help="Set the model class of the experiment.",
    )

    parser.add_argument(
        "--experiment_name",
        type=str,
        help="Set the name of the experiment. [model_name]/[data]/[task]/[other]",
    )

    parser.add_argument(
        "--save_prediction",
        action="store_true",
        dest="save_prediction",
        help="Do we want to save prediction",
    )

    parser.add_argument(
        "--epochs",
        default=2,
        type=int,
        metavar="N",
        help="number of total epochs to run",
    )
    parser.add_argument(
        "--per_gpu_train_batch_size",
        default=16,
        type=int,
        help="Batch size per GPU/CPU for training.",
    )
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help="Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument(
        "--per_gpu_eval_batch_size",
        default=64,
        type=int,
        help="Batch size per GPU/CPU for evaluation.",
    )

    parser.add_argument(
        "--max_length", default=160, type=int, help="Max length of the sequences."
    )

    parser.add_argument(
        "--warmup_steps", default=-1, type=int, help="Linear warmup over warmup_steps."
    )
    parser.add_argument(
        "--max_grad_norm", default=1.0, type=float, help="Max gradient norm."
    )
    parser.add_argument(
        "--learning_rate",
        default=1e-5,
        type=float,
        help="The initial learning rate for Adam.",
    )
    parser.add_argument(
        "--weight_decay", default=0.0, type=float, help="Weight decay if we apply some."
    )
    parser.add_argument(
        "--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer."
    )

    parser.add_argument(
        "--eval_frequency",
        default=1000,
        type=int,
        help="set the evaluation frequency, evaluate every X global step.",
    )

    parser.add_argument(
        "--train_data", type=str, help="The training data used in the experiments."
    )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


anli/src/nli/training.py [173:425]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
}

nli_label2index = {
    "e": 0,
    "n": 1,
    "c": 2,
    "h": -1,
}


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)


class NLIDataset(Dataset):
    def __init__(self, data_list, transform) -> None:
        super().__init__()
        self.d_list = data_list
        self.len = len(self.d_list)
        self.transform = transform

    def __getitem__(self, index: int):
        return self.transform(self.d_list[index])

    # you should write schema for each of the input elements

    def __len__(self) -> int:
        return self.len


class NLITransform(object):
    def __init__(self, model_name, tokenizer, max_length=None):
        self.model_name = model_name
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, sample):
        processed_sample = dict()
        processed_sample["uid"] = sample["uid"]
        processed_sample["gold_label"] = sample["label"]
        processed_sample["y"] = nli_label2index[sample["label"]]

        # premise: str = sample['premise']
        premise: str = sample["context"] if "context" in sample else sample["premise"]
        hypothesis: str = sample["hypothesis"]

        if premise.strip() == "":
            premise = "empty"

        if hypothesis.strip() == "":
            hypothesis = "empty"

        tokenized_input_seq_pair = self.tokenizer.encode_plus(
            premise,
            hypothesis,
            max_length=self.max_length,
            return_token_type_ids=True,
            truncation=True,
        )

        processed_sample.update(tokenized_input_seq_pair)

        return processed_sample


class FlippedNLITransform(object):
    def __init__(self, model_name, tokenizer, max_length=None):
        self.model_name = model_name
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __call__(self, sample):
        processed_sample = dict()
        processed_sample["uid"] = sample["uid"]
        processed_sample["gold_label"] = sample["label"]
        processed_sample["y"] = nli_label2index[sample["label"]]

        # premise: str = sample['premise']
        premise: str = sample["context"] if "context" in sample else sample["premise"]
        hypothesis: str = sample["hypothesis"]

        if premise.strip() == "":
            premise = "empty"

        if hypothesis.strip() == "":
            hypothesis = "empty"

        tokenized_input_seq_pair = self.tokenizer.encode_plus(
            hypothesis,
            premise,
            max_length=self.max_length,
            return_token_type_ids=True,
            truncation=True,
        )

        processed_sample.update(tokenized_input_seq_pair)

        return processed_sample


def build_eval_dataset_loader_and_sampler(
    d_list, data_transformer, batching_schema, batch_size_per_gpu_eval
):
    d_dataset = NLIDataset(d_list, data_transformer)
    d_sampler = SequentialSampler(d_dataset)
    d_dataloader = DataLoader(
        dataset=d_dataset,
        batch_size=batch_size_per_gpu_eval,
        shuffle=False,  #
        num_workers=0,
        pin_memory=True,
        sampler=d_sampler,
        collate_fn=BaseBatchBuilder(batching_schema),
    )  #
    return d_dataset, d_sampler, d_dataloader


def sample_data_list(d_list, ratio):
    if ratio <= 0:
        raise ValueError(
            "Invalid training weight ratio. Please change --train_weights."
        )
    upper_int = int(math.ceil(ratio))
    if upper_int == 1:
        return d_list  # if ratio is 1 then we just return the data list
    else:
        sampled_d_list = []
        for _ in range(upper_int):
            sampled_d_list.extend(copy.deepcopy(d_list))
        if np.isclose(ratio, upper_int):
            return sampled_d_list
        else:
            sampled_length = int(ratio * len(d_list))
            random.shuffle(sampled_d_list)
            return sampled_d_list[:sampled_length]


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--cpu", action="store_true", help="If set, we only use CPU.")
    parser.add_argument(
        "--single_gpu", action="store_true", help="If set, we only use single GPU."
    )
    parser.add_argument("--fp16", action="store_true", help="If set, we will use fp16.")

    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )

    # environment arguments
    parser.add_argument(
        "-s", "--seed", default=1, type=int, metavar="N", help="manual random seed"
    )
    parser.add_argument(
        "-n", "--num_nodes", default=1, type=int, metavar="N", help="number of nodes"
    )
    parser.add_argument(
        "-g", "--gpus_per_node", default=1, type=int, help="number of gpus per node"
    )
    parser.add_argument(
        "-nr", "--node_rank", default=0, type=int, help="ranking within the nodes"
    )

    # experiments specific arguments
    parser.add_argument(
        "--debug_mode",
        action="store_true",
        dest="debug_mode",
        help="weather this is debug mode or normal",
    )

    parser.add_argument(
        "--model_class_name", type=str, help="Set the model class of the experiment.",
    )

    parser.add_argument(
        "--experiment_name",
        type=str,
        help="Set the name of the experiment. [model_name]/[data]/[task]/[other]",
    )

    parser.add_argument(
        "--save_prediction",
        action="store_true",
        dest="save_prediction",
        help="Do we want to save prediction",
    )

    parser.add_argument(
        "--epochs",
        default=2,
        type=int,
        metavar="N",
        help="number of total epochs to run",
    )
    parser.add_argument(
        "--per_gpu_train_batch_size",
        default=16,
        type=int,
        help="Batch size per GPU/CPU for training.",
    )
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help="Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument(
        "--per_gpu_eval_batch_size",
        default=64,
        type=int,
        help="Batch size per GPU/CPU for evaluation.",
    )

    parser.add_argument(
        "--max_length", default=160, type=int, help="Max length of the sequences."
    )

    parser.add_argument(
        "--warmup_steps", default=-1, type=int, help="Linear warmup over warmup_steps."
    )
    parser.add_argument(
        "--max_grad_norm", default=1.0, type=float, help="Max gradient norm."
    )
    parser.add_argument(
        "--learning_rate",
        default=1e-5,
        type=float,
        help="The initial learning rate for Adam.",
    )
    parser.add_argument(
        "--weight_decay", default=0.0, type=float, help="Weight decay if we apply some."
    )
    parser.add_argument(
        "--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer."
    )

    parser.add_argument(
        "--eval_frequency",
        default=1000,
        type=int,
        help="set the evaluation frequency, evaluate every X global step.",
    )

    parser.add_argument(
        "--train_data", type=str, help="The training data used in the experiments."
    )
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -