override def enrich()

in nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCOpenNLPTokenEnricher.scala [76:106]


    override def enrich(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): Unit =
        val txts = toks.map(_.getText).toArray

        this.synchronized {
            val poses = if tagger != null then tagger.tag(txts) else txts.map(_ => "")
            var lemmas = if lemmatizer != null then lemmatizer.lemmatize(txts, poses) else txts

            require(toks.sizeIs == poses.length && toks.sizeIs == lemmas.length)

            // For some reasons lemmatizer (en-lemmatizer.dict) marks some words with non-existent POS 'NNN'
            // Valid POS list: https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
            val suspIdxs = lemmas.zip(poses).zipWithIndex.flatMap {
                // "0" is flag that lemma cannot be obtained for some reasons.
                case ((lemma, pos), i) => Option.when(lemma == "O" && pos == "NN")(i)
            }

            if suspIdxs.nonEmpty && lemmatizer != null then
                val fixes = lemmatizer.
                    lemmatize(suspIdxs.map(i => txts(i)), suspIdxs.map(_ => "NNN")).
                    zipWithIndex.
                    flatMap { (lemma, i) => Option.when(lemma != "0")(suspIdxs(i) -> lemma) }.toMap
                lemmas = lemmas.zipWithIndex.map {
                    (lemma, idx) => fixes.getOrElse(idx, lemma)
                }

            toks.zip(poses).zip(lemmas).foreach { case ((t, pos), lemma) =>
                t.put("pos", pos)
                t.put("lemma", lemma)
                () // Otherwise - NPE.
            }
        }