in core/src/main/scala/com/microsoft/azure/synapse/ml/stages/TextPreprocessor.scala [51:80]
def mapText(inputText: String): String = {
val chars = inputText.toCharArray
val outputText: StringBuilder = new StringBuilder("")
def isAlpha(char: Char): Boolean = {
char.isLetterOrDigit || char.equals("_".charAt(0))
}
def skipAlphas(rest: Seq[Char]): Unit = rest match {
case _ if rest.isEmpty || !isAlpha(rest.head)=> scan(rest)
case _ => skipAlphas(rest.tail)
}
def findMatch(rest: Seq[Char],
matched: Seq[Char],
hasMatch: Boolean,
chars: Seq[Char],
trie: Option[Trie]): Unit = (rest, matched, hasMatch, chars, trie) match {
case _ if trie.isEmpty || chars.isEmpty =>
outputText ++= matched
if (hasMatch) skipAlphas(rest) else scan(rest)
case _ if trie.get.value.isEmpty => findMatch(rest, matched, false, chars.tail, trie.get.get(chars.head))
case _ => findMatch(chars, trie.get.value, true, chars.tail, trie.get.get(chars.head))
}
def scan(chars: Seq[Char]): Unit = {
if (chars.nonEmpty) findMatch(chars.tail, Array(chars.head), false, chars.tail, this.get(chars.head))
}
scan(chars)
outputText.mkString
}