in sagemaker-spark-sdk/src/main/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/XGBoostSageMakerEstimator.scala [225:394]
def getRefreshLeaf: Int = $(refreshLeaf)
/** The type of boosting process to run. Can be default or update.
* Default = "default"
*/
val processType: Param[String] = new Param(this, "process_type",
"The type of boosting process to run. Can be default or update.",
ParamValidators.inArray(Array("default", "update")))
def getProcessType: String = $(processType)
/** Controls the way that new nodes are added to the tree. Can be "depthwise" or "lossguide".
* Currently supported only if tree_method is set to hist.
* Default = "depthwise"
*/
val growPolicy: Param[String] = new Param(this, "grow_policy",
"Controls the way new nodes are added to the tree. Can be 'depthwise' or 'lossguide'.",
ParamValidators.inArray(Array("depthwise", "lossguide")))
def getGrowPolicy: String = $(growPolicy)
/** Maximum number of nodes to be added. Relevant only if grow_policy = lossguide. Must be >= 0.
* Default = 0
*/
val maxLeaves: IntParam = new IntParam(this, "max_leaves",
"Maximum number of nodes to be added. Only relevant for the 'lossguide' grow policy. " +
"Must be >= 0. ",
ParamValidators.gtEq(0))
def getMaxLeaves: Int = $(maxLeaves)
/** Maximum number of discrete bins to bucket continuous features. Used only if tree_method=hist.
* Default = 256
*/
val maxBin: IntParam = new IntParam(this, "max_bin",
"Maximum number of discrete bins to bucket continuous features. This is only used if " +
"'hist' is specified as tree_method. ", ParamValidators.gtEq(1))
def getMaxBin: Int = $(maxBin)
/* Dart Booster parameters */
/** Type of sampling algorithm. Can be "uniform" or "weighted".
* Default = "uniform"
*/
val sampleType: Param[String] = new Param(this, "sample_type",
"Type of sampling algorithm. Can be 'uniform' or 'weighted'. " +
"\"uniform\": dropped trees are selected uniformly." +
"\"weighted\": dropped trees are selected in proportion to weight.",
ParamValidators.inArray(Array("uniform", "weighted")))
def getSampleType: String = $(sampleType)
/** Type of normalization algorithm. Can be "tree" or "forest".
* Default = "tree"
*/
val normalizeType: Param[String] = new Param(this, "normalize_type",
"Type of normalization algorithm. Can be 'tree' or 'forest'." +
"\"tree\": new trees have the same weight of each of dropped trees." +
"\"forest\": new trees have the same weight of sum of dropped trees (forest).",
ParamValidators.inArray(Array("tree", "forest")))
def getNormalizeType: String = $(normalizeType)
/** Dropout rate (a fraction of previous trees to drop during the dropout). Must be in [0, 1].
* Default = 0.0
*/
val rateDrop: DoubleParam = new DoubleParam(this, "rate_drop",
"dropout rate (a fraction of previous trees to drop during the dropout). Must be in [0, 1]. ",
ParamValidators.inRange(0.0, 1.0))
def getRateDrop: Double = $(rateDrop)
/** Whether to drop at least one tree during the dropout.
* Default = 0
*/
val oneDrop: IntParam = new IntParam(this, "one_drop",
"whether to drop at least one tree during the dropout. ",
ParamValidators.inArray(Array(0, 1)))
def getOneDrop: Int = $(oneDrop)
/** Probability of skipping the dropout procedure during a boosting iteration. Must be in [0, 1].
* Default: 0
*/
val skipDrop: DoubleParam = new DoubleParam(this, "skip_drop",
"Probability of skipping the dropout procedure during a boosting iteration. Must be in [0, 1].",
ParamValidators.inRange(0.0, 1.0))
def getSkipDrop: Double = $(skipDrop)
/* Parameters for linear booster */
/** L2 regularization term on bias. Must be in [0, 1].
* Default = 0.0
*/
val lambdaBias: DoubleParam = new DoubleParam(this, "lambda_bias",
"L2 regularization term on bias. Must be in [0, 1].", ParamValidators.inRange(0, 1))
def getLambdaBias: Double = $(lambdaBias)
/* Parameters for Tweedie Regression */
/** Parameter that controls the variance of the Tweedie distribution. Must be in (1, 2).
* Default = 1.5
*/
val tweedieVariancePower: DoubleParam = new DoubleParam(this, "tweedie_variance_power",
"parameter that controls the variance of the Tweedie distribution. Must be in (1, 2).",
ParamValidators.inRange(1, 2, lowerInclusive = false, upperInclusive = false))
def getTweedieVariancePower: Double = $(tweedieVariancePower)
/* Learning task parameters */
/** Specifies the learning task and the corresponding learning objective.
* Default: "reg:linear"
*/
val objective: Param[String] = new Param(this, "objective", "Specifies the learning objective." +
"\"reg:linear\" -- linear regression " +
"\"reg:logistic\" --logistic regression " +
"\"binary:logistic\" --logistic regression for binary classification, output is probability " +
"\"binary:logitraw\" --logistic regression for binary classification, output is score before" +
" logistic transformation " +
"\"count:poisson\" --poisson regression for count data, output mean of poisson distribution " +
"max_delta_step is set to 0.7 by default in poisson regression (used to safeguard " +
"optimization) " +
"\"multi:softmax\" --multiclass classification using the softmax objective. " +
"You also need to set num_class(number of classes)" +
"\"multi:softprob\" --same as softmax, but output a vector of ndata * nclass, which can be" +
" further reshaped to ndata, nclass matrix. The result contains predicted probability of each" +
" data point belonging to each class. " +
"\"rank:pairwise\" --set XGBoost to do ranking task by minimizing the pairwise loss " +
"\"reg:gamma\" --gamma regression with log-link. Output is a mean of gamma distribution. " +
"It might be useful, e.g., for modeling insurance claims severity, or for any outcome " +
"that might be gamma-distributed" +
"\"reg:tweedie\" --Tweedie regression with log-link. It might be useful, e.g., for " +
"modeling total loss in insurance, or for any outcome that might be Tweedie-distributed.",
ParamValidators.inArray(Array("reg:linear", "reg:logistic", "binary:logistic",
"binary:logistraw", "count:poisson", "multi:softmax", "multi:softprob",
"rank:pairwise", "reg:gamma", "reg:tweedie")))
def getObjective: String = $(objective)
/**
* No default. Used for softmax multiclass classification.
*/
val numClasses: IntParam = new IntParam(this, "num_class",
"Number of classes", ParamValidators.gtEq(1))
def getNumClasses: Int = $(numClasses)
/** The initial prediction score of all instances, global bias.
* Default = 0.5
*/
val baseScore: DoubleParam = new DoubleParam(this, "base_score",
"the initial prediction score of all instances, global bias")
def getBaseScore: Double = $(baseScore)
/** Evaluation metrics for validation data. A default metric will be assigned according to the
* objective (rmse for regression, error for classification, and map for ranking )
* Default according to objective
*/
val evalMetric: Param[String] = new Param(this, "eval_metric",
"Evaluation metrics for validation data. A default metric will be assigned according to " +
"objective (rmse for regression, and error for classification, mean average " +
"precision for ranking)",
ParamValidators.inArray(Array("rmse", "mae", "logloss", "error", "error@t", "merror",
"mlogloss", "auc", "ndcg", "map", "ndcg@n", "ndcg-", "ndcg@n-", "map-", "map@n-")))
def getEvalMetric: String = $(evalMetric)
/** Random number seed.
* Default = 0
*/
val seed: IntParam = new IntParam(this, "seed", "Random number seed.")
def getSeed: Int = $(seed)
/**
* Number of rounds for gradient boosting. Must be >= 1. Required.
*/
val numRound: IntParam = new IntParam(this, "num_round",
"Number of rounds. Must be >= 1. ", ParamValidators.gtEq(1))
def getNumRound: Int = $(numRound)
}
object XGBoostSageMakerEstimator {