in nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/impl/NCSemanticSynonymsProcessor.scala [208:257]
def prepare(
stemmer: NCStemmer,
tokParser: NCTokenParser,
macros: Map[String, String],
elements: Seq[NCSemanticElement]
): NCSemanticSynonymsHolder =
require(stemmer != null && tokParser != null)
// Order is important.
checkElements(elements)
checkMacros(macros, elements)
val macroParser = new NCMacroParser
if macros != null then for ((name, body) <- macros) macroParser.addMacro(name, body)
case class Holder(synonym: NCSemanticSynonym, elementType: String):
lazy val root: String = synonym.chunks.map(p => if p.isText then p.stem else p.text).mkString(" ")
val buf = mutable.ArrayBuffer.empty[Holder]
for (e <- elements)
val elemType = e.getType
def add(syns: Seq[NCSemanticSynonym]): Unit = buf ++= syns.map(Holder(_, elemType))
def addSpec(txt: String, value: String = null): Unit =
buf += Holder(NCSemanticSynonym(Seq(NCSemanticSynonymChunk(TEXT, txt, stemmer.stem(txt.toLowerCase))), value), elemType)
addSpec(elemType)
if e.getSynonyms != null then
add(convertSynonyms(stemmer, tokParser, macroParser, elemType, e.getSynonyms).map(NCSemanticSynonym(_)))
if e.getValues != null then
for ((name, syns) <- e.getValues)
addSpec(name, value = name)
if syns != null then
add(
convertSynonyms(stemmer, tokParser, macroParser, elemType, syns).
map(chunks => NCSemanticSynonym(chunks, value = name))
)
buf.groupBy(_.root).values.foreach(hs => {
val elemTypes = hs.map(_.elementType).toSet
if elemTypes.size > 1 then
for (s <- hs.map(_.synonym).distinct)
logger.warn(s"Synonym appears in multiple elements [synonym='${s.chunks.mkString(" ")}', elements=${elemTypes.mkString("{", ",", "}")}]")
})