in blink/common/params.py [0:0]
def add_training_args(self, args=None):
"""
Add model training args.
"""
parser = self.add_argument_group("Model Training Arguments")
parser.add_argument(
"--evaluate", action="store_true", help="Whether to run evaluation."
)
parser.add_argument(
"--output_eval_file",
default=None,
type=str,
help="The txt file where the the evaluation results will be written.",
)
parser.add_argument(
"--train_batch_size", default=8, type=int,
help="Total batch size for training."
)
parser.add_argument("--max_grad_norm", default=1.0, type=float)
parser.add_argument(
"--learning_rate",
default=3e-5,
type=float,
help="The initial learning rate for Adam.",
)
parser.add_argument(
"--num_train_epochs",
default=1,
type=int,
help="Number of training epochs.",
)
parser.add_argument(
"--print_interval", type=int, default=10,
help="Interval of loss printing",
)
parser.add_argument(
"--eval_interval",
type=int,
default=100,
help="Interval for evaluation during training",
)
parser.add_argument(
"--save_interval", type=int, default=1,
help="Interval for model saving"
)
parser.add_argument(
"--warmup_proportion",
default=0.1,
type=float,
help="Proportion of training to perform linear learning rate warmup for. "
"E.g., 0.1 = 10% of training.",
)
parser.add_argument(
"--gradient_accumulation_steps",
type=int,
default=1,
help="Number of updates steps to accumualte before performing a backward/update pass.",
)
parser.add_argument(
"--type_optimization",
type=str,
default="all_encoder_layers",
help="Which type of layers to optimize in BERT",
)
parser.add_argument(
"--shuffle", type=bool, default=False,
help="Whether to shuffle train data",
)