in scripts/reader/train.py [0:0]
def add_train_args(parser):
"""Adds commandline arguments pertaining to training a model. These
are different from the arguments dictating the model architecture.
"""
parser.register('type', 'bool', str2bool)
# Runtime environment
runtime = parser.add_argument_group('Environment')
runtime.add_argument('--no-cuda', type='bool', default=False,
help='Train on CPU, even if GPUs are available.')
runtime.add_argument('--gpu', type=int, default=-1,
help='Run on a specific GPU')
runtime.add_argument('--data-workers', type=int, default=5,
help='Number of subprocesses for data loading')
runtime.add_argument('--parallel', type='bool', default=False,
help='Use DataParallel on all available GPUs')
runtime.add_argument('--random-seed', type=int, default=1013,
help=('Random seed for all numpy/torch/cuda '
'operations (for reproducibility)'))
runtime.add_argument('--num-epochs', type=int, default=40,
help='Train data iterations')
runtime.add_argument('--batch-size', type=int, default=32,
help='Batch size for training')
runtime.add_argument('--test-batch-size', type=int, default=128,
help='Batch size during validation/testing')
# Files
files = parser.add_argument_group('Filesystem')
files.add_argument('--model-dir', type=str, default=MODEL_DIR,
help='Directory for saved models/checkpoints/logs')
files.add_argument('--model-name', type=str, default='',
help='Unique model identifier (.mdl, .txt, .checkpoint)')
files.add_argument('--data-dir', type=str, default=DATA_DIR,
help='Directory of training/validation data')
files.add_argument('--train-file', type=str,
default='SQuAD-v1.1-train-processed-corenlp.txt',
help='Preprocessed train file')
files.add_argument('--dev-file', type=str,
default='SQuAD-v1.1-dev-processed-corenlp.txt',
help='Preprocessed dev file')
files.add_argument('--dev-json', type=str, default='SQuAD-v1.1-dev.json',
help=('Unprocessed dev file to run validation '
'while training on'))
files.add_argument('--embed-dir', type=str, default=EMBED_DIR,
help='Directory of pre-trained embedding files')
files.add_argument('--embedding-file', type=str,
default='glove.840B.300d.txt',
help='Space-separated pretrained embeddings file')
# Saving + loading
save_load = parser.add_argument_group('Saving/Loading')
save_load.add_argument('--checkpoint', type='bool', default=False,
help='Save model + optimizer state after each epoch')
save_load.add_argument('--pretrained', type=str, default='',
help='Path to a pretrained model to warm-start with')
save_load.add_argument('--expand-dictionary', type='bool', default=False,
help='Expand dictionary of pretrained model to ' +
'include training/dev words of new data')
# Data preprocessing
preprocess = parser.add_argument_group('Preprocessing')
preprocess.add_argument('--uncased-question', type='bool', default=False,
help='Question words will be lower-cased')
preprocess.add_argument('--uncased-doc', type='bool', default=False,
help='Document words will be lower-cased')
preprocess.add_argument('--restrict-vocab', type='bool', default=True,
help='Only use pre-trained words in embedding_file')
# General
general = parser.add_argument_group('General')
general.add_argument('--official-eval', type='bool', default=True,
help='Validate with official SQuAD eval')
general.add_argument('--valid-metric', type=str, default='f1',
help='The evaluation metric used for model selection')
general.add_argument('--display-iter', type=int, default=25,
help='Log state after every <display_iter> epochs')
general.add_argument('--sort-by-len', type='bool', default=True,
help='Sort batches by length for speed')