in core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/DataConversion.scala [26:104]
def this() = this(Identifiable.randomUID("DataConversion"))
/** Comma separated list of columns whose type will be converted
* @group param
*/
val cols: StringArrayParam = new StringArrayParam(this, "cols",
"Comma separated list of columns whose type will be converted")
/** @group getParam */
final def getCols: Array[String] = $(cols)
/** @group setParam */
def setCols(value: Array[String]): this.type = set(cols, value)
/** The result type
* @group param
*/
val convertTo: Param[String] = new Param[String](this, "convertTo", "The result type")
setDefault(convertTo->"")
/** @group getParam */
final def getConvertTo: String = $(convertTo)
/** @group setParam */
def setConvertTo(value: String): this.type = set(convertTo, value)
/** Format for DateTime when making DateTime:String conversions.
* The default is yyyy-MM-dd HH:mm:ss
* @group param
*/
val dateTimeFormat: Param[String] = new Param[String](this, "dateTimeFormat",
"Format for DateTime when making DateTime:String conversions")
setDefault(dateTimeFormat -> "yyyy-MM-dd HH:mm:ss")
/** @group getParam */
final def getDateTimeFormat: String = $(dateTimeFormat)
/** @group setParam */
def setDateTimeFormat(value: String): this.type = set(dateTimeFormat, value)
/** Apply the <code>DataConversion</code> transform to the dataset
* @param dataset The dataset to be transformed
* @return The transformed dataset
*/
override def transform(dataset: Dataset[_]): DataFrame = {
logTransform[DataFrame]({
require(dataset != null, "No dataset supplied")
require(dataset.columns.length != 0, "Dataset with no columns cannot be converted")
val colsList = $(cols).map(_.trim)
val errorList = verifyCols(dataset.toDF(), colsList)
if (errorList.nonEmpty) {
throw new NoSuchElementException
}
var df = dataset.toDF
val res: DataFrame = {
for (convCol <- colsList) {
df = $(convertTo) match {
case "boolean" => numericTransform(df, BooleanType, convCol)
case "byte" => numericTransform(df, ByteType, convCol)
case "short" => numericTransform(df, ShortType, convCol)
case "integer" => numericTransform(df, IntegerType, convCol)
case "long" => numericTransform(df, LongType, convCol)
case "float" => numericTransform(df, FloatType, convCol)
case "double" => numericTransform(df, DoubleType, convCol)
case "string" => numericTransform(df, StringType, convCol)
case "toCategorical" =>
val model = new ValueIndexer().setInputCol(convCol).setOutputCol(convCol).fit(df)
model.transform(df)
case "clearCategorical" =>
new IndexToValue().setInputCol(convCol).setOutputCol(convCol).transform(df)
case "date" => toDateConversion(df, convCol)
}
}
df
}
res
})
}