def this()

in core/src/main/scala/com/microsoft/azure/synapse/ml/featurize/DataConversion.scala [26:104]


  def this() = this(Identifiable.randomUID("DataConversion"))

  /** Comma separated list of columns whose type will be converted
    * @group param
    */
  val cols: StringArrayParam = new StringArrayParam(this, "cols",
    "Comma separated list of columns whose type will be converted")

  /** @group getParam */
  final def getCols: Array[String] = $(cols)

  /** @group setParam */
  def setCols(value: Array[String]): this.type = set(cols, value)

  /** The result type
    * @group param
    */
  val convertTo: Param[String] = new Param[String](this, "convertTo", "The result type")
  setDefault(convertTo->"")

  /** @group getParam */
  final def getConvertTo: String = $(convertTo)

  /** @group setParam */
  def setConvertTo(value: String): this.type = set(convertTo, value)

  /** Format for DateTime when making DateTime:String conversions.
    * The default is yyyy-MM-dd HH:mm:ss
    * @group param
    */
  val dateTimeFormat: Param[String] = new Param[String](this, "dateTimeFormat",
    "Format for DateTime when making DateTime:String conversions")
  setDefault(dateTimeFormat -> "yyyy-MM-dd HH:mm:ss")

  /** @group getParam */
  final def getDateTimeFormat: String = $(dateTimeFormat)

  /** @group setParam */
  def setDateTimeFormat(value: String): this.type = set(dateTimeFormat, value)

  /** Apply the <code>DataConversion</code> transform to the dataset
    * @param dataset The dataset to be transformed
    * @return The transformed dataset
    */
  override def transform(dataset: Dataset[_]): DataFrame = {
    logTransform[DataFrame]({
      require(dataset != null, "No dataset supplied")
      require(dataset.columns.length != 0, "Dataset with no columns cannot be converted")
      val colsList = $(cols).map(_.trim)
      val errorList = verifyCols(dataset.toDF(), colsList)
      if (errorList.nonEmpty) {
        throw new NoSuchElementException
      }
      var df = dataset.toDF

      val res: DataFrame = {
        for (convCol <- colsList) {
          df = $(convertTo) match {
            case "boolean" => numericTransform(df, BooleanType, convCol)
            case "byte" => numericTransform(df, ByteType, convCol)
            case "short" => numericTransform(df, ShortType, convCol)
            case "integer" => numericTransform(df, IntegerType, convCol)
            case "long" => numericTransform(df, LongType, convCol)
            case "float" => numericTransform(df, FloatType, convCol)
            case "double" => numericTransform(df, DoubleType, convCol)
            case "string" => numericTransform(df, StringType, convCol)
            case "toCategorical" =>
              val model = new ValueIndexer().setInputCol(convCol).setOutputCol(convCol).fit(df)
              model.transform(df)
            case "clearCategorical" =>
              new IndexToValue().setInputCol(convCol).setOutputCol(convCol).transform(df)
            case "date" => toDateConversion(df, convCol)
          }
        }
        df
      }
      res
    })
  }