in src/deep_baselines/run.py [0:0]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--data_dir", default=None, type=str, required=True, help="input dir, including *.csv/*.txt.")
parser.add_argument("--separate_file", action="store_true", help="The id of each sample in the dataset is separate from its details")
parser.add_argument("--tfrecords", action="store_true", help="whether the dataset is in tfrecords")
parser.add_argument("--filename_pattern", default=None, type=str, help="the dataset filename pattern, such as {}_with_pdb_emb.csv including train_with_pdb_emb.csv, dev_with_pdb_emb.csv, and test_with_pdb_emb.csv in ${data_dir}")
parser.add_argument("--dataset_name", default="rdrp_40_extend", type=str, required=True, help="dataset name")
parser.add_argument("--dataset_type", default="protein", type=str, required=True, choices=["protein", "dna", "rna"], help="dataset type")
parser.add_argument("--task_type", default="multi_label", type=str, required=True, choices=["multi_label", "multi_class", "binary_class"], help="task type")
parser.add_argument("--model_type", default=None, type=str, required=True, choices=["CHEER-CatWCNN", "CHEER-WDCNN", "CHEER-WCNN", "VirHunter", "Virtifier", "VirSeeker"], help="model type.")
parser.add_argument("--label_type", default="rdrp", type=str, required=True, help="label type.")
parser.add_argument("--label_filepath", default=None, type=str, required=True, help="label filepath.")
parser.add_argument("--seq_vocab_path", default=None, type=str, help="sequence token vocab filepath")
parser.add_argument("--output_dir", default="./result/", type=str, required=True, help="output dir.")
parser.add_argument("--log_dir", default="./logs/", type=str, required=True, help="log dir.")
parser.add_argument("--tb_log_dir", default="./tb-logs/", type=str, required=True, help="tensorboard log dir.")
# Other parameters
parser.add_argument("--config_path", default=None, type=str, required=True, help="the model configuration filepath")
parser.add_argument("--cache_dir", default="", type=str, help="cache dir")
parser.add_argument("--do_train", action="store_true", help="whether to run training.")
parser.add_argument("--do_eval", action="store_true", help="whether to run eval on the dev set.")
parser.add_argument("--do_predict", action="store_true", help="whether to run predict on the test set.")
parser.add_argument("--evaluate_during_training", action="store_true", help="whether to evaluation during training at each logging step.")
parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="batch size per GPU/CPU for training.")
parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, help="batch size per GPU/CPU for evaluation.")
parser.add_argument("--gradient_accumulation_steps", type=int, default=1, help="number of updates steps to accumulate before performing a backward/update pass.")
parser.add_argument("--learning_rate", default=1e-4, type=float, help="the initial learning rate for Adam.")
parser.add_argument("--weight_decay", default=0.0, type=float, help="weight decay if we apply some.")
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="epsilon for Adam optimizer.")
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="max gradient norm.")
parser.add_argument("--num_train_epochs", default=50, type=int, help="total number of training epochs to perform.")
parser.add_argument("--max_steps", default=-1, type=int, help="if > 0: set total number of training steps to perform. Override num_train_epochs.")
parser.add_argument("--warmup_steps", default=0, type=int, help="linear warmup over warmup_steps.")
parser.add_argument("--logging_steps", type=int, default=50, help="log every X updates steps.")
parser.add_argument("--save_steps", type=int, default=50, help="save checkpoint every X updates steps.")
parser.add_argument("--eval_all_checkpoints", action="store_true", help="evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number")
parser.add_argument("--no_cuda", action="store_true", help="avoid using CUDA when available")
parser.add_argument("--overwrite_output_dir", action="store_true", help="overwrite the content of the output directory")
parser.add_argument("--overwrite_cache", action="store_true", help="overwrite the cached training and evaluation sets")
parser.add_argument("--seed", type=int, default=42, help="random seed for initialization")
parser.add_argument("--fp16", action="store_true", help="whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit")
parser.add_argument("--fp16_opt_level", type=str, default="O1", help="for fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html")
parser.add_argument("--local_rank", type=int, default=-1, help="for distributed training: local_rank")
# multi-label/ binary-class
parser.add_argument("--sigmoid", action="store_true", help="classifier add sigmoid if task_type is binary-class or multi-label")
# loss func
parser.add_argument("--loss_type", type=str, default="bce", choices=["focal_loss", "bce", "multilabel_cce", "asl", "cce"], help="loss type")
# which metric for model finalization selected
parser.add_argument("--max_metric_type", type=str, default="f1", required=True, choices=["acc", "jaccard", "prec", "recall", "f1", "fmax", "pr_auc", "roc_auc"], help="which metric for model selected")
parser.add_argument("--early_stopping_rounds", default=None, type=int, help="early stopping rounds.")
# for focal Loss
parser.add_argument("--focal_loss_alpha", type=float, default=0.7, help="focal loss alpha value")
parser.add_argument("--focal_loss_gamma", type=float, default=2.0, help="focal loss gamma value")
parser.add_argument("--focal_loss_reduce", action="store_true", help="mean for one sample(default sum)")
# for asymmetric Loss
parser.add_argument("--asl_gamma_neg", type=float, default=4.0, help="negative gamma for asl")
parser.add_argument("--asl_gamma_pos", type=float, default=1.0, help="positive gamma for asl")
# for all
parser.add_argument("--seq_max_length", default=2048, type=int, help="the length of input sequence more than max length will be truncated, shorter will be padded.")
parser.add_argument("--trunc_type", default="right", type=str, required=True, choices=["left", "right"], help="truncate type for whole input")
parser.add_argument("--embedding_trainable", action="store_true", help="whether to train the embedding matrix")
parser.add_argument("--embedding_dim", default=128, type=int, help="the dim of embedding vector")
# for CHEER
parser.add_argument("--channel_in", default=None, type=int, help="channel in")
# for CHEER and VirHunter
parser.add_argument("--kernel_nums", default=None, type=str, help="kernel_nums or kernel_num")
parser.add_argument("--kernel_sizes", default=None, type=str, help="kernel_sizes or kernel_size")
parser.add_argument("--fc_sizes", default=None, type=str, help="fc_sizes or fc_size")
# for VirHunter
parser.add_argument("--one_hot_encode", action="store_true", help="use one hot encode")
# for VirSeeker
parser.add_argument("--embedding", action="store_true", help="using embedding")
# for Virtifier
parser.add_argument("--embedding_init", action="store_true", help="pre-trained embedding")
parser.add_argument("--embedding_init_path", default=None, type=str, help="re-trained embedding filepath")
parser.add_argument("--bidirectional", action="store_true", help="bidirectional of LSTM")
parser.add_argument("--num_layers", default=1, type=int, help="num layers of LSTM")
parser.add_argument("--hidden_dim", default=128, type=int, help="the dim of hidden vector")
parser.add_argument("--padding_idx", default=0, type=int, help="padding idx")
parser.add_argument("--weight", default=None, type=float, help="loss weight for multi_class task")
parser.add_argument("--pos_weight", default=None, type=float, help="positive weight")
parser.add_argument("--dropout", default=None, type=float, help="dropout")
parser.add_argument("--bias", action="store_true", help="bias")
parser.add_argument("--save_all", action="store_true", help="save all check-point)")
parser.add_argument("--delete_old", action="store_true", help="delete old check-point)")
args = parser.parse_args()
args.output_mode = args.task_type
return args