in train_simple.py [0:0]
def main(
batch_size: int = 32,
max_ctx: int = 1024,
ds_name: str = "sciq",
loss: str = "xent",
n_docs: int = 20000,
n_test_docs: int = 10000,
model_size: str = "gpt2",
lr: Optional[float] = None,
optim: Optional[str] = None,
epochs: int = 2,
force_retrain: bool = False,
seed: int = 0,
minibatch_size_per_device: Optional[float] = None,
train_with_dropout: bool = False,
results_folder: str = "/tmp/results",
linear_probe: bool = False,
lr_schedule: str = "cosine_anneal",
# Note: you can pass either weak_model_size or weak_labels_path. If you pass
# weak_model_size, we will guess the path to the weak labels based on the weak
# model. If you pass weak_labels_path, we will use that path instead.
# If you pass neither, we will train on ground truth.
weak_model_size: Optional[str] = None,
weak_labels_path: Optional[str] = None,
sweep_subfolder: str = "default",
# Set to a very large value so that by default we don't do any intermediate evals but
# still do final evals (which requires eval_every to be set to a non-zero, non-None value)
eval_every: int = 1000000,
sync_command: Optional[str] = None,