in sagemaker-spark-sdk/src/main/scala/com/amazonaws/services/sagemaker/sparksdk/algorithms/KMeansSageMakerEstimator.scala [45:137]
def getK: Int = $(k)
/**
* The initialization algorithm to choose centroids. Must be "random" or "kmeans++".
* Default: "random".
*/
val initMethod : Param[String] = new Param(this, "init_method",
"The initialization algorithm to choose centroids. Supported options: 'random' and 'kmeans++'.",
ParamValidators.inArray(Array("random", "kmeans++")))
def getInitMethod: String = $(initMethod)
/**
* Maximum iterations for Lloyds EM procedure in the local kmeans used in finalized stage.
* Must be > 0.
* Default: 300.
*/
val maxIter : IntParam = new IntParam(this, "local_lloyd_max_iter",
"Maximum iterations for Lloyds EM procedure" +
"in the local kmeans used in finalized stage. Must be > 0", ParamValidators.gtEq(1))
def getMaxIter: Int = $(maxIter)
/**
* Tolerance for change in ssd for early stopping in local kmeans. Must be in range [0, 1].
* Default: 0.0001.
*/
val tol : DoubleParam = new DoubleParam(this, "local_lloyd_tol",
"Tolerance for change in ssd for early stopping in local kmeans. Must be in range [0, 1].",
ParamValidators.inRange(0.0, 1.0))
def getTol: Double = $(tol)
/**
* The number of trials of the local kmeans algorithm. The output with best loss will be chosen.
* Must be > 0 or "auto".
* Default: "auto".
*/
val trialNum : Param[String] = new Param(this, "local_lloyd_num_trials",
"The number of trials of the local kmeans algorithm. Must be > 0 or 'auto'.",
autoOrAboveParamValidator(0, false))
def getTrialNum: String = $(trialNum)
/**
* The local initialization algorithm to choose centroids. Must be "random" or "kmeans++".
* Default: "kmeans++".
*/
val localInitMethod : Param[String] = new Param(this, "local_lloyd_init_method",
"The local initialization algorithm to choose centroids. Supported options: 'random' and " +
"'kmeans++'", ParamValidators.inArray(Array("random", "kmeans++")))
def getLocalInitMethod: String = $(localInitMethod)
/**
* The weight decaying rate of each point. 0 means no decay at all. Must be >= 0.
* Default: 0.
*/
val halflifeTime : IntParam = new IntParam(this, "half_life_time_size",
"The weight decaying rate of each point. Must be >= 0.",
ParamValidators.gtEq(0))
def getHalflifeTime: Int = $(halflifeTime)
/**
* The number of passes done over the training data. Must be > 0.
* Default: 1.
*/
val epochs : IntParam = new IntParam(this, "epochs",
"The number of passes done over the training data. Must be > 0.",
ParamValidators.gtEq(1))
def getEpochs: Int = $(epochs)
/**
* The factor of extra centroids to create. The number of initial centroids equals
* centerFactor * k. Must be > 0 or "auto".
* Default: "auto".
*/
val centerFactor : Param[String] = new Param(this, "extra_center_factor",
"The factor of extra centroids to create. Must be > 0 or 'auto'",
autoOrAboveParamValidator(0, false))
def getCenterFactor: String = $(centerFactor)
/** Metric to be used for scoring the model. String of comma separated metrics.
* Support metrics are "msd" and "ssd".
* "msd" Means Square Error, "ssd": Sum of square distance
* Default = "msd"
*/
val evalMetrics: Param[String] = new Param(this, "eval_metrics",
"Metric to be used for scoring the model. String of comma separated metrics. " +
"Support metrics are 'msd' and 'ssd'." +
"'msd' Means Square Error, 'ssd': Sum of square distance" +
"is usually set automatically, depending on some other parameters", evalMetricsValidator)
def getEvalMetrics: String = $(evalMetrics).stripPrefix("[").stripSuffix("]")
private def evalMetricsValidator: String => Boolean = {
(value: String) => value.stripPrefix("[").stripSuffix("]").split(",")
.map(metric => evalMetricsValues.contains(metric.trim)).reduce(_ && _)
}