def main()

in c2/tools/train_net.py [0:0]


def main():
    # TODO: use argv
    parser = argparse.ArgumentParser(
        description="Caffe2: simple video training"
    )
    parser.add_argument("--model_name", type=str, default='r2plus1d',
                        help="Name of the model")
    parser.add_argument("--model_depth", type=int, default=18,
                        help="Depth of the model")
    parser.add_argument("--train_data", type=str, default=None,
                        help="Path to train data",
                        required=True)
    parser.add_argument("--test_data", type=str, default=None,
                        help="Path to test data")
    parser.add_argument("--db_type", type=str, default="minidb",
                        help="Database type to save the training model")
    parser.add_argument("--gpus", type=str,
                        help="Comma separated list of GPU devices to use")
    parser.add_argument("--num_gpus", type=int, default=1,
                        help="Number of GPU devices (instead of --gpus)")
    parser.add_argument("--scale_h", type=int, default=128,
                        help="Scale image height to")
    parser.add_argument("--scale_w", type=int, default=171,
                        help="Scale image width to")
    parser.add_argument("--crop_size", type=int, default=112,
                        help="Input image size (to crop to)")
    parser.add_argument("--num_decode_threads", type=int, default=4,
                        help="# of threads/GPU dedicated for video decoding")
    parser.add_argument("--clip_length_rgb", type=int, default=16,
                        help="Length of input clips")
    parser.add_argument("--sampling_rate_rgb", type=int, default=1,
                        help="Frame sampling rate")
    parser.add_argument("--num_labels", type=int, default=101,
                        help="Number of labels")
    parser.add_argument("--num_channels", type=int, default=3,
                        help="Number of channels")
    parser.add_argument("--clip_length_of", type=int, default=8,
                        help="Frames of optical flow data")
    parser.add_argument("--sampling_rate_of", type=int, default=2,
                        help="")
    parser.add_argument("--frame_gap_of", type=int, default=2,
                        help="")
    parser.add_argument("--input_type", type=int, default=0,
                        help="False=rgb, True=optical flow")
    parser.add_argument("--flow_data_type", type=int, default=0,
                        help="0=Flow2C, 1=Flow3C, 2=FlowWithGray, 3=FlowWithRGB")
    parser.add_argument("--do_flow_aggregation", type=int, default=0,
                        help="whether to aggregate optical flow across "
                        + "multiple frames")
    parser.add_argument("--get_video_id", type=int, default=0,
                        help="Output video id")
    parser.add_argument("--batch_size", type=int, default=32,
                        help="Batch size, total over all GPUs")
    parser.add_argument("--epoch_size", type=int, default=110000,
                        help="Number of videos/epoch, total over all machines")
    parser.add_argument("--num_epochs", type=int, default=50,
                        help="Num epochs.")
    parser.add_argument("--base_learning_rate", type=float, default=0.003,
                        help="Initial learning rate.")
    parser.add_argument("--step_epoch", type=int, default=10,
                        help="Reducing learning rate every step_epoch.")
    parser.add_argument("--gamma", type=float, default=0.1,
                        help="Learning rate decay factor.")
    parser.add_argument("--display_iter", type=int, default=10,
                        help="Display information every # of iterations.")
    parser.add_argument("--weight_decay", type=float, default=0.005,
                        help="Weight decay (L2 regularization)")
    parser.add_argument("--cudnn_workspace_limit_mb", type=int, default=64,
                        help="CuDNN workspace limit in MBs")
    parser.add_argument("--file_store_path", type=str, default="/tmp",
                        help="Path to directory to use for saving checkpoints")
    parser.add_argument("--save_model_name", type=str, default="simple_c3d",
                        help="Save the trained model to a given name")
    parser.add_argument("--load_model_path", type=str, default=None,
                        help="Load previously saved model to continue training")
    parser.add_argument("--use_cudnn", type=int, default=1,
                        help="Use CuDNN")
    parser.add_argument("--profiling", type=int, default=0,
                        help="Profile training time")
    parser.add_argument("--pred_layer_name", type=str, default=None,
                        help="the prediction layer name")
    parser.add_argument("--multi_label", type=int, default=0,
                        help="Multiple label training")
    parser.add_argument("--channel_multiplier", type=float, default=1.0,
                        help="Channel multiplier")
    parser.add_argument("--bottleneck_multiplier", type=float, default=1.0,
                        help="Bottleneck multiplier")
    parser.add_argument("--use_dropout", type=int, default=0,
                        help="Use dropout at the prediction layer")
    parser.add_argument("--conv1_temporal_stride", type=int, default=1,
                        help="Conv1 temporal striding")
    parser.add_argument("--conv1_temporal_kernel", type=int, default=3,
                        help="Conv1 temporal kernel")
    parser.add_argument("--video_res_type", type=int, default=1,
                        help="Video frame scaling option, 0: scaled by "
                        + "height x width; 1: scaled by short edge")
    parser.add_argument("--use_pool1", type=int, default=0,
                        help="use pool1 layer")
    parser.add_argument("--jitter_scales", type=str, default="128,160", required=True,
                        help="spatial scales jitter, separated by commas")
    parser.add_argument("--use_local_file", type=int, default=0,
                        help="use local file")
    parser.add_argument("--is_checkpoint", type=int, default=1,
                        help="0: pretrained_model is used as initalization"
                        + "1: pretrained_model is used as a checkpoint")
    parser.add_argument("--audio_input_3d", type=int, default=0,
                        help="is audio input 3d or 2d; 0 for 2d")
    parser.add_argument("--g_blend", type=int, default=0,
                        help="use gradient-blending to train model")
    parser.add_argument("--audio_weight", type=float, default=0.0,
                        help="g_blend weights for audio head")
    parser.add_argument("--visual_weight", type=float, default=0.0,
                        help="g_blend weights for visual head")
    parser.add_argument("--av_weight", type=float, default=1.0,
                        help="g_blend weights for av head")
    args = parser.parse_args()

    log.info(args)

    assert model_builder.model_validation(
        args.model_name,
        args.model_depth,
        args.clip_length_of if args.input_type else args.clip_length_rgb,
        args.crop_size
    )

    Train(args)