in train.py [0:0]
def add_args(parser):
#############################
##### Training hyperparameters
#############################
parser.add_argument(
"--logdir",
default="cachedir",
type=str,
help="location where log of experiments will be stored",
)
parser.add_argument("--exp", default="default", type=str, help="name of experiment")
parser.add_argument("--resume-iter", default=0, type=int, help="resume value")
parser.add_argument("--n-epochs", default=10000, type=int, help="number of epochs of training")
parser.add_argument(
"--batch-size", default=128, type=int, help="batch size to use during training"
)
parser.add_argument("--log-interval", default=50, type=int, help="interval to log results")
parser.add_argument("--save-interval", default=1000, type=int, help="interval to log results")
parser.add_argument(
"--no-train",
default=False,
action="store_true",
help="Instead of training, only test the model",
)
parser.add_argument(
"--no-cuda", default=False, action="store_true", help="do not use GPUs for computations"
)
parser.add_argument(
"--model",
default="transformer",
type=str,
help="model to use during training. options: transformer, fc, s2s (set 2 set) "
"transformer: transformer model"
"fc: MLP baseline used in paper"
"s2s: Set2Set baseline used in paper"
"graph: GNN baseline used in paper",
)
#############################
##### Dataset hyperparameters
#############################
parser.add_argument("--data-workers", default=4, type=int, help="number of dataloader workers")
parser.add_argument(
"--multisample",
default=16,
type=int,
help="number of different rotamers to select" "from an individual protein",
)
#############################
##### Distributed Training
#############################
parser.add_argument("--nodes", default=1, type=int, help="number of nodes for training")
parser.add_argument("--gpus", default=1, type=int, help="number of gpus per node")
parser.add_argument("--node-rank", default=0, type=int, help="rank of node")
parser.add_argument(
"--master-addr", default="8.8.8.8", type=str, help="address of communicating server"
)
parser.add_argument("--port", default=10002, type=int, help="port for communicating server")
parser.add_argument(
"--slurm", default=False, action="store_true", help="run experiments on SLURM?"
)
#############################
##### Transformer hyperparameters
#############################
parser.add_argument(
"--encoder-layers", default=6, type=int, help="number of transformer layers"
)
parser.add_argument(
"--dropout", default=0.0, type=float, help="dropout of attention weights in transformer"
)
parser.add_argument(
"--relu-dropout", default=0.0, type=float, help="chance of dropping out a relu unit"
)
parser.add_argument(
"--no-encoder-normalize-before",
action="store_true",
default=False,
help="do not normalize outputs before the encoder (transformer only)",
)
parser.add_argument(
"--encoder-attention-heads",
default=8,
type=int,
help="number of attention heads (transformer only)",
)
parser.add_argument(
"--attention-dropout", default=0.0, type=float, help="dropout probability for attention"
)
parser.add_argument(
"--encoder-ffn-embed-dim",
default=1024,
type=int,
help="hidden dimension to use in transformer",
)
parser.add_argument(
"--encoder-embed-dim", default=256, type=int, help="original embed dim of element"
)
parser.add_argument(
"--max-size",
default=64,
type=str,
help="number of nearby atoms to attend" "when predicting energy of rotamer",
)
#############################
##### EBM hyperparameters
#############################
parser.add_argument(
"--neg-sample",
default=1,
type=int,
help="number of negative rotamer samples" " per real data sample (1-1 ratio)",
)
parser.add_argument("--l2-norm", default=False, action="store_true", help="norm the energies")
parser.add_argument(
"--no-augment",
default=False,
action="store_true",
help="do not augment training data with so3 rotations",
)
#############################
##### generic model params
#############################
parser.add_argument(
"--start-lr", default=1e-10, type=float, help="initial warmup learning rate"
)
parser.add_argument("--end-lr", default=2e-4, type=float, help="end lr of training")
parser.add_argument(
"--lr-schedule",
default="constant",
type=str,
help="schedule to anneal the learning rate of transformer."
" options: constant, inverse_sqrt",
)
parser.add_argument("--warmup-itr", default=500, type=int, help="number of warm up iterations")
parser.add_argument(
"--single",
default=False,
action="store_true",
help="overfit to a single protein in dataset" "(sanity check on architecture)",
)
parser.add_argument(
"--grad_accumulation", default=1, type=int, help="number of gradient accumulation steps"
)
#############################
##### Negative sampling
#############################
parser.add_argument(
"--uniform",
default=False,
action="store_true",
help="uniform over all candidate bins in Dunbrak library"
"as oposed to weighted based off empericial frequency",
)
parser.add_argument(
"--weighted-gauss",
default=False,
action="store_true",
help="given chi and psi angles, iterpolate between nearby bins"
"based off Gaussian with weighted sum of means/var with weights computed by distance",
)
parser.add_argument(
"--gmm",
default=False,
action="store_true",
help="given chi and psi angles, interpolate between nearby bins"
"by sampling each nearby bin based off Gaussian with weights computed by distance",
)
parser.add_argument(
"--chi-mean",
default=False,
action="store_true",
help="instead of sampling from Gaussians from bins in the Dunbrak library"
"just sample the mean of the bins",
)
return parser