tzrec/protos/optimizer.proto (136 lines of code) (raw):

syntax = "proto2"; package tzrec.protos; message SparseOptimizer { oneof optimizer { FusedSGDOptimizer sgd_optimizer = 1; FusedAdagradOptimizer adagrad_optimizer = 2; FusedAdamOptimizer adam_optimizer = 3; FusedLarsSGDOptimizer lars_sgd_optimizer = 4; FusedLAMBOptimizer lamb_optimizer = 5; FusedPartialRowWiseLAMBOptimizer partial_rowwise_lamb_optimizer = 6; FusedPartialRowWiseAdamOptimizer partial_rowwise_adam_optimizer = 7; FusedRowWiseAdagradOptimizer rowwise_adagrad_optimizer = 8; } oneof learning_rate { ConstantLR constant_learning_rate = 101; ExponentialDecayLR exponential_decay_learning_rate = 102; ManualStepLR manual_step_learning_rate = 103; } } message DenseOptimizer { oneof optimizer { SGDOptimizer sgd_optimizer = 1; AdagradOptimizer adagrad_optimizer = 2; AdamOptimizer adam_optimizer = 3; } oneof learning_rate { ConstantLR constant_learning_rate = 101; ExponentialDecayLR exponential_decay_learning_rate = 102; ManualStepLR manual_step_learning_rate = 103; } } enum WeightDecayMode { NONE = 0; L2 = 1; DECOUPLE = 2; } message FusedSGDOptimizer { required float lr = 1 [default = 0.002]; optional bool gradient_clipping = 2 [default = false]; optional float max_gradient = 3 [default = 1.0]; } message FusedAdagradOptimizer { required float lr = 1 [default = 0.002]; optional bool gradient_clipping = 2 [default = false]; optional float max_gradient = 3 [default = 1.0]; } message FusedAdamOptimizer { required float lr = 1 [default = 0.002]; optional float beta1 = 2 [default = 0.9]; optional float beta2 = 3 [default = 0.999]; optional float weight_decay = 4 [default = 0.0]; optional bool gradient_clipping = 5 [default = false]; optional float max_gradient = 6 [default = 1.0]; } message FusedLarsSGDOptimizer { required float lr = 1 [default = 0.002]; optional float momentum = 2 [default = 0.9]; optional float weight_decay = 3 [default = 0.0]; optional bool gradient_clipping = 4 [default = false]; optional float max_gradient = 5 [default = 1.0]; } message FusedLAMBOptimizer { required float lr = 1 [default = 0.002]; optional float beta1 = 2 [default = 0.9]; optional float beta2 = 3 [default = 0.999]; optional float weight_decay = 4 [default = 0.0]; optional bool gradient_clipping = 5 [default = false]; optional float max_gradient = 6 [default = 1.0]; } message FusedPartialRowWiseLAMBOptimizer { required float lr = 1 [default = 0.002]; optional float beta1 = 2 [default = 0.9]; optional float beta2 = 3 [default = 0.999]; optional float weight_decay = 4 [default = 0.0]; optional bool gradient_clipping = 5 [default = false]; optional float max_gradient = 6 [default = 1.0]; } message FusedPartialRowWiseAdamOptimizer { required float lr = 1 [default = 0.002]; optional float beta1 = 2 [default = 0.9]; optional float beta2 = 3 [default = 0.999]; optional float weight_decay = 4 [default = 0.0]; optional bool gradient_clipping = 5 [default = false]; optional float max_gradient = 6 [default = 1.0]; } message FusedRowWiseAdagradOptimizer { required float lr = 1 [default = 0.002]; optional float weight_decay = 2 [default = 0.0]; optional WeightDecayMode weight_decay_mode = 3 [default = NONE]; optional bool gradient_clipping = 4 [default = false]; optional float max_gradient = 5 [default = 1.0]; } message SGDOptimizer { required float lr = 1 [default = 0.002]; optional float momentum = 2 [default = 0.9]; optional float weight_decay = 3 [default = 0.0]; } message AdagradOptimizer { required float lr = 1 [default = 0.002]; optional float weight_decay = 2 [default = 0.0]; } message AdamOptimizer { required float lr = 1 [default = 0.002]; optional float beta1 = 2 [default = 0.9]; optional float beta2 = 3 [default = 0.999]; optional float weight_decay = 4 [default = 0.0]; } message ConstantLR { } message ExponentialDecayLR { // decay steps or epochs optional uint32 decay_size = 1; // decay rate optional float decay_factor = 2 [default = 0.95]; // if true, decay the learning rate at discrete intervals optional bool staircase = 3 [default = true]; // warmup start learning rate optional float warmup_learning_rate = 4 [default = 0.0]; // warmup steps or epochs optional uint32 warmup_size = 5 [default = 0]; // minimum learning rate optional float min_learning_rate = 6 [default = 0.0]; // schedule by epoch or by step. optional bool by_epoch = 7 [default = false]; } message ManualStepLR { // a list of global steps or epochs at which to switch learning repeated uint32 schedule_sizes = 1; // a list of learning rates corresponding to intervals repeated float learning_rates = 2; // Whether to linearly interpolate learning rates for steps in // [0, schedule_steps[0]]. optional bool warmup = 3 [default = false]; // schedule by epoch or by step. optional bool by_epoch = 4 [default = false]; }