in mdl.py [0:0]
def cli_main(args):
parser = options.get_training_parser()
parser.add_argument("--mdl-block-size", type=int, default=1,
help="Size of the transmitted block. Used when calculating description length")
parser.add_argument("--mdl-batches-per-epoch", type=int, default=3000, help="Number of updates in per training")
parser.add_argument("--mdl-batch-size", type=int, default=None, help="If set, specifies the number of examples sampled (with replacement) "
"for each update of the learner. If not specified, all examples available at the step are used.")
parser.add_argument("--mdl-train-examples", type=int, default=None, required=True,
help="First `mdl-train-examples` lines in the training dataset are considered as initial training data (see README).")
args = options.parse_args_and_arch(parser, input_args=args)
assert torch.cuda.is_available()
assert args.mdl_train_examples
if not args.sentence_avg:
print('Overriding --sentence-avg', file=sys.stderr)
args.sentence_avg = True
# override multi-gpu logic
args.distributed_world_size = 1
main(args)