in sagemaker-spark-sdk/src/main/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/XGBoostSageMakerEstimator.scala [46:212]
def getBooster: String = $(booster)
/** Whether in silent mode. Can be 0 or 1.
* 0 means printing running messages, 1 means silent mode.
* Default = 0
*/
val silent : IntParam = new IntParam(this, "silent",
"Whether in silent mode. Can be 0 or 1. " +
"0 means print running messages, 1 means silent mode.",
ParamValidators.inArray(Array(0, 1)))
def getSilent: Int = $(silent)
/** Number of parallel threads used to run xgboost. Must be >= 1.
* Defaults to maximum number of threads available.
*/
val nThread: IntParam = new IntParam(this, "nthread",
"Number of parallel threads used to run xgboost. Must be >= 1. ",
ParamValidators.gtEq(1))
def getNThread: Int = $(nThread)
/* Booster Parameters */
/** Step size shrinkage used in update to prevent overfitting. After each boosting step, we can
* directly get the weights of new features and eta actually shrinks the feature weights to make
* the boosting process more conservative. Must be in [0, 1]
* Default = 0.3
*/
val eta: DoubleParam = new DoubleParam(this, "eta",
"Step size shrinkage used in update to prevent overfitting. After each boosting step, " +
"we can directly get the weights of new features. and eta shrinks the feature " +
"weights to make the boosting process more conservative. Must be in [0, 1]. ",
ParamValidators.inRange(0, 1))
def getEta: Double = $(eta)
/** Minimum loss reduction required to make a further partition on a leaf node of the tree.
* The larger, the more conservative the algorithm will be. Must be >= 0.
* Default = 0
*/
val gamma: DoubleParam = new DoubleParam(this, "gamma",
"Minimum loss reduction required to make an additional partition on a leaf node of the tree. " +
"The larger the value, the more conservative the algorithm will be. Must be >= 0.",
ParamValidators.gtEq(0))
def getGamma: Double = $(gamma)
/** Maximum depth of a tree, increase this value will make the model more complex (likely to be
* overfitting). 0 indicates no limit, limit is required when grow_policy=depth-wise.
* Must be >= 0.
* Default = 6
*/
val maxDepth: IntParam = new IntParam(this, "max_depth",
" Maximum depth of a tree, increase this value will make the model more complex (likely to be" +
" overfitting). 0 indicates no limit, limit is required when grow_policy=depth-wise. " +
"Must be >= 0. ",
ParamValidators.gtEq(0))
def getMaxDepth: Int = $(maxDepth)
/** Minimum sum of instance weight (hessian) needed in a child. If the tree partition step results
* in a leaf node with the sum of instance weight less than min_child_weight, then the building
* process will give up further partitioning. In linear regression mode, this simply corresponds
* to minimum number of instances needed to be in each node. The larger the algorithm is,
* the more conservative it will be. Must be >= 0.
* Default = 1
*/
val minChildWeight: DoubleParam = new DoubleParam(this, "min_child_weight",
"Minimum sum of instance weight (hessian) needed in a child. If the tree partition step " +
"results in a leaf node with the sum of instance weight less than min_child_weight, then " +
"the building process will give up further partitioning. In linear regression mode, this " +
"simply corresponds to minimum number of instances needed to be in each node. The larger the " +
"value, the more conservative the algorithm will be. Must be >= 0.", ParamValidators.gtEq(0))
def getMinChildWeight: Double = $(minChildWeight)
/** Maximum delta step allowed for each tree's weight estimation can be. Valid inputs: When a
* positive integer is used, it helps make the update more conservative. The preferred options
* is to use it in logistic regression. Set it to 1-10 to help control the update. Must be >= 0.
* Default = 0
*/
val maxDeltaStep: DoubleParam = new DoubleParam(this, "max_delta_step",
"Maximum delta step allowed for each tree's weight estimation to be. If the value is set to" +
" 0, it means there is no constraint. If it is set to a positive value, it can help make the " +
"update step more conservative. Usually this parameter is not needed, but it might help " +
"in logistic regression when the classes are extremely imbalanced. Setting it to value of " +
"1-10 might help control the update. Must be >= 0.", ParamValidators.gtEq(0))
def getMaxDeltaStep: Double = $(maxDeltaStep)
/** Subsample ratio of the training instance. Setting it to 0.5 means that XGBoost randomly
* collected half of the data instances to grow trees and this will prevent overfitting.
* Must be in (0, 1].
* Default = 1
*/
val subsample: DoubleParam = new DoubleParam(this, "subsample",
"Subsample ratio of the training instance. Setting it to 0.5 means that XGBoost will " +
"randomly collect half of the data instances to grow trees and this will prevent overfitting." +
"Must be in (0, 1]. ",
ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true))
def getSubsample: Double = $(subsample)
/** Subsample ratio of columns when constructing each tree. Must be in (0, 1]
* Default = 1
*/
val colSampleByTree: DoubleParam = new DoubleParam(this, "colsample_bytree",
"Subsample ratio of columns when constructing each tree. Must be in (0, 1]",
ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true))
def getColSampleByTree: Double = $(colSampleByTree)
/** Subsample ratio of columns for each split, in each level. Must be in (0, 1].
* Default = 1
*/
val colSampleByLevel: DoubleParam = new DoubleParam(this, "colsample_bylevel",
"Subsample ratio of columns for each split, in each level. Must be in (0, 1].",
ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = true))
def getColSampleByLevel: Double = $(colSampleByLevel)
/** L2 regularization term on weights. Increase this value will make model more conservative.
* Default = 1
*/
val lambda: DoubleParam = new DoubleParam(this, "lambda",
"L2 regularization term on weights, increase this value will make model more conservative.")
def getLambda: Double = $(lambda)
/** L1 regularization term on weights. Increase this value will make model more conservative.
* Default = 0
*/
val alpha: DoubleParam = new DoubleParam(this, "alpha",
"L1 regularization term on weights, increase this value will make model more conservative.")
def getAlpha: Double = $(alpha)
/** The tree construction algorithm used in XGBoost. Can be auto, exact, approx, hist.
* Default = "auto"
*/
val treeMethod: Param[String] = new Param(this, "tree_method",
"The tree construction algorithm used in XGBoost. Can be auto, exact, approx, hist.",
ParamValidators.inArray(Array("auto", "exact", "approx", "hist")))
def getTreeMethod: String = $(treeMethod)
/** Used only for approximate greedy algorithm. Translates into O(1 / sketch_eps) number of
* bins. Compared to directly select number of bins, this comes with theoretical guarantee with
* sketch accuracy. Must be in (0, 1).
* Default = 0.03
*/
val sketchEps: DoubleParam = new DoubleParam(this, "sketch_eps",
"Used only for approximate greedy algorithm. Translates into " +
"O(1 / sketch_eps) number of bins. Compared to directly select number of bins, " +
"this comes with theoretical guarantee with sketch accuracy. Must be in (0, 1). ",
ParamValidators.inRange(0, 1, lowerInclusive = false, upperInclusive = false))
def getSketchEps: Double = $(sketchEps)
/** Controls the balance of positive and negative weights. It's useful for unbalanced classes.
* A typical value to consider: sum(negative cases) / sum(positive cases).
* Default = 1
*/
val scalePosWeight: DoubleParam = new DoubleParam(this, "scale_pos_weight",
"Scale the weight of positive examples by this factor. Useful for unbalanced classes")
def getScalePosWeight: Double = $(scalePosWeight)
/** A comma-separated string that defines the sequence of tree updaters to run.
* This provides a modular way to construct and to modify the trees.
* Default = "grow_colmaker,prune"
*/
val updater: Param[String] = new Param(this, "updater",
"A comma separated string defining the sequence of tree updaters to run, providing a modular " +
"way to construct and to modify the trees. This is an advanced parameter that " +
"is usually set automatically, depending on some other parameters", updaterValidator)
def getUpdater: String = $(updater)
private def updaterValidator: String => Boolean = {
(value: String) => value.split(",").map(ud => updaterValues.contains(ud.trim)).reduce(_ && _)
}