in c2/tools/train_net.py [0:0]
def main():
# TODO: use argv
parser = argparse.ArgumentParser(
description="Caffe2: simple video training"
)
parser.add_argument("--model_name", type=str, default='r2plus1d',
help="Name of the model")
parser.add_argument("--model_depth", type=int, default=18,
help="Depth of the model")
parser.add_argument("--train_data", type=str, default=None,
help="Path to train data",
required=True)
parser.add_argument("--test_data", type=str, default=None,
help="Path to test data")
parser.add_argument("--db_type", type=str, default="minidb",
help="Database type to save the training model")
parser.add_argument("--gpus", type=str,
help="Comma separated list of GPU devices to use")
parser.add_argument("--num_gpus", type=int, default=1,
help="Number of GPU devices (instead of --gpus)")
parser.add_argument("--scale_h", type=int, default=128,
help="Scale image height to")
parser.add_argument("--scale_w", type=int, default=171,
help="Scale image width to")
parser.add_argument("--crop_size", type=int, default=112,
help="Input image size (to crop to)")
parser.add_argument("--num_decode_threads", type=int, default=4,
help="# of threads/GPU dedicated for video decoding")
parser.add_argument("--clip_length_rgb", type=int, default=16,
help="Length of input clips")
parser.add_argument("--sampling_rate_rgb", type=int, default=1,
help="Frame sampling rate")
parser.add_argument("--num_labels", type=int, default=101,
help="Number of labels")
parser.add_argument("--num_channels", type=int, default=3,
help="Number of channels")
parser.add_argument("--clip_length_of", type=int, default=8,
help="Frames of optical flow data")
parser.add_argument("--sampling_rate_of", type=int, default=2,
help="")
parser.add_argument("--frame_gap_of", type=int, default=2,
help="")
parser.add_argument("--input_type", type=int, default=0,
help="False=rgb, True=optical flow")
parser.add_argument("--flow_data_type", type=int, default=0,
help="0=Flow2C, 1=Flow3C, 2=FlowWithGray, 3=FlowWithRGB")
parser.add_argument("--do_flow_aggregation", type=int, default=0,
help="whether to aggregate optical flow across "
+ "multiple frames")
parser.add_argument("--get_video_id", type=int, default=0,
help="Output video id")
parser.add_argument("--batch_size", type=int, default=32,
help="Batch size, total over all GPUs")
parser.add_argument("--epoch_size", type=int, default=110000,
help="Number of videos/epoch, total over all machines")
parser.add_argument("--num_epochs", type=int, default=50,
help="Num epochs.")
parser.add_argument("--base_learning_rate", type=float, default=0.003,
help="Initial learning rate.")
parser.add_argument("--step_epoch", type=int, default=10,
help="Reducing learning rate every step_epoch.")
parser.add_argument("--gamma", type=float, default=0.1,
help="Learning rate decay factor.")
parser.add_argument("--display_iter", type=int, default=10,
help="Display information every # of iterations.")
parser.add_argument("--weight_decay", type=float, default=0.005,
help="Weight decay (L2 regularization)")
parser.add_argument("--cudnn_workspace_limit_mb", type=int, default=64,
help="CuDNN workspace limit in MBs")
parser.add_argument("--file_store_path", type=str, default="/tmp",
help="Path to directory to use for saving checkpoints")
parser.add_argument("--save_model_name", type=str, default="simple_c3d",
help="Save the trained model to a given name")
parser.add_argument("--load_model_path", type=str, default=None,
help="Load previously saved model to continue training")
parser.add_argument("--use_cudnn", type=int, default=1,
help="Use CuDNN")
parser.add_argument("--profiling", type=int, default=0,
help="Profile training time")
parser.add_argument("--pred_layer_name", type=str, default=None,
help="the prediction layer name")
parser.add_argument("--multi_label", type=int, default=0,
help="Multiple label training")
parser.add_argument("--channel_multiplier", type=float, default=1.0,
help="Channel multiplier")
parser.add_argument("--bottleneck_multiplier", type=float, default=1.0,
help="Bottleneck multiplier")
parser.add_argument("--use_dropout", type=int, default=0,
help="Use dropout at the prediction layer")
parser.add_argument("--conv1_temporal_stride", type=int, default=1,
help="Conv1 temporal striding")
parser.add_argument("--conv1_temporal_kernel", type=int, default=3,
help="Conv1 temporal kernel")
parser.add_argument("--video_res_type", type=int, default=1,
help="Video frame scaling option, 0: scaled by "
+ "height x width; 1: scaled by short edge")
parser.add_argument("--use_pool1", type=int, default=0,
help="use pool1 layer")
parser.add_argument("--jitter_scales", type=str, default="128,160", required=True,
help="spatial scales jitter, separated by commas")
parser.add_argument("--use_local_file", type=int, default=0,
help="use local file")
parser.add_argument("--is_checkpoint", type=int, default=1,
help="0: pretrained_model is used as initalization"
+ "1: pretrained_model is used as a checkpoint")
parser.add_argument("--audio_input_3d", type=int, default=0,
help="is audio input 3d or 2d; 0 for 2d")
parser.add_argument("--g_blend", type=int, default=0,
help="use gradient-blending to train model")
parser.add_argument("--audio_weight", type=float, default=0.0,
help="g_blend weights for audio head")
parser.add_argument("--visual_weight", type=float, default=0.0,
help="g_blend weights for visual head")
parser.add_argument("--av_weight", type=float, default=1.0,
help="g_blend weights for av head")
args = parser.parse_args()
log.info(args)
assert model_builder.model_validation(
args.model_name,
args.model_depth,
args.clip_length_of if args.input_type else args.clip_length_rgb,
args.crop_size
)
Train(args)