def this()

in vw/src/main/scala/com/microsoft/azure/synapse/ml/vw/VowpalWabbitFeaturizer.scala [29:88]


  def this() = this(Identifiable.randomUID("VowpalWabbitFeaturizer"))

  setDefault(inputCols -> Array())
  setDefault(outputCol -> "features")

  val seed = new IntParam(this, "seed", "Hash seed")
  setDefault(seed -> 0)

  def getSeed: Int = $(seed)
  def setSeed(value: Int): this.type = set(seed, value)

  val stringSplitInputCols = new StringArrayParam(this, "stringSplitInputCols",
    "Input cols that should be split at word boundaries")
  setDefault(stringSplitInputCols -> Array())

  def getStringSplitInputCols: Array[String] = $(stringSplitInputCols)
  def setStringSplitInputCols(value: Array[String]): this.type = set(stringSplitInputCols, value)

  val preserveOrderNumBits = new IntParam(this, "preserveOrderNumBits",
    "Number of bits used to preserve the feature order. This will reduce the hash size. " +
      "Needs to be large enough to fit count the maximum number of words",
    (value: Int) => value >= 0 && value < 29)
  setDefault(preserveOrderNumBits -> 0)

  def getPreserveOrderNumBits: Int = $(preserveOrderNumBits)
  def setPreserveOrderNumBits(value: Int): this.type = set(preserveOrderNumBits, value)

  val prefixStringsWithColumnName = new BooleanParam(this, "prefixStringsWithColumnName",
    "Prefix string features with column name")
  setDefault(prefixStringsWithColumnName -> true)

  def getPrefixStringsWithColumnName: Boolean = $(prefixStringsWithColumnName)
  def setPrefixStringsWithColumnName(value: Boolean): this.type = set(prefixStringsWithColumnName, value)

  private def getAllInputCols = getInputCols ++ getStringSplitInputCols

  private def getFeaturizer(name: String,
                            dataType: DataType,
                            nullable: Boolean,
                            idx: Int,
                            namespaceHash: Int): Featurizer = {

    val prefixName = if (getPrefixStringsWithColumnName) name else ""
    dataType match {
      case DoubleType => getNumericFeaturizer[Double](prefixName, nullable, idx, namespaceHash, 0)
      case FloatType => getNumericFeaturizer[Float](prefixName, nullable, idx, namespaceHash, 0)
      case IntegerType => getNumericFeaturizer[Int](prefixName, nullable, idx, namespaceHash, 0)
      case LongType => getNumericFeaturizer[Long](prefixName, nullable, idx, namespaceHash, 0)
      case ShortType => getNumericFeaturizer[Short](prefixName, nullable, idx, namespaceHash, 0)
      case ByteType => getNumericFeaturizer[Byte](prefixName, nullable, idx, namespaceHash, 0)
      case BooleanType => new BooleanFeaturizer(idx, prefixName, namespaceHash, getMask)
      case StringType => getStringFeaturizer(name, prefixName, idx, namespaceHash)
      case ArrayType(t: StringType, _) => getArrayFeaturizer("", ArrayType(t), nullable, idx, namespaceHash)
      // Arrays of strings never use a prefix and use the column name namespace hash
      case arr: ArrayType => getArrayFeaturizer(name, arr, nullable, idx)
      case struct: StructType => getStructFeaturizer(struct, name, nullable, idx)
      case m: MapType => getMapFeaturizer(prefixName, m, idx, namespaceHash)
      case m: Any => getOtherFeaturizer(m, prefixName, idx)
    }
  }