def mapText()

in core/src/main/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessor.scala [51:80]


  def mapText(inputText: String): String = {
    val chars = inputText.toCharArray
    val outputText: StringBuilder = new StringBuilder("")
    def isAlpha(char: Char): Boolean = {
      char.isLetterOrDigit || char.equals("_".charAt(0))
    }
    def skipAlphas(rest: Seq[Char]): Unit = rest match {
      case _ if rest.isEmpty || !isAlpha(rest.head)=> scan(rest)
      case _  => skipAlphas(rest.tail)
    }

    def findMatch(rest: Seq[Char],
                  matched: Seq[Char],
                  hasMatch: Boolean,
                  chars: Seq[Char],
                  trie: Option[Trie]): Unit = (rest, matched, hasMatch, chars, trie) match {
      case _ if trie.isEmpty || chars.isEmpty =>
        outputText ++= matched
        if (hasMatch) skipAlphas(rest) else scan(rest)
      case _ if trie.get.value.isEmpty => findMatch(rest, matched, false, chars.tail, trie.get.get(chars.head))
      case _ => findMatch(chars, trie.get.value, true, chars.tail, trie.get.get(chars.head))
    }

    def scan(chars: Seq[Char]): Unit = {
      if (chars.nonEmpty) findMatch(chars.tail, Array(chars.head), false, chars.tail, this.get(chars.head))
    }

    scan(chars)
    outputText.mkString
  }