def matches()

in nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCEnStopWordsTokenEnricher.scala [262:359]
61 lines of code
20 McCabe index (conditional complexity)

        def matches(s: String, posOpt: Option[String]): Boolean =
            if s.contains(' ') then
                false
            else
                posOpt match
                    case Some(pos) =>
                        !exclPoses.contains(pos) &&
                            !matches(s, excludes.getOrElse(pos, Set.empty)) &&
                            (
                                inclPoses.contains(pos) ||
                                    matches(s, any) ||
                                    matches(s, includes.getOrElse(pos, Set.empty))
                                )
                    case _ => throw new AssertionError("Unexpected POS.")

    /**
      * stopword data holder.
      *
      * @param stems Stems data holder.
      * @param lemmas Lemmas data holder.
      * @param origins Origins data holder.
      * @param wildcardsLemmas Wildcards lemmas data holder.
      * @param wildcardsOrigins Wildcards origins data holder.
      */
    private case class StopWordHolder(
        stems: HashHolder,
        lemmas: HashHolder,
        origins: HashHolder,
        wildcardsLemmas: ScanHolder,
        wildcardsOrigins: ScanHolder
    ):
        def matches(toks: Seq[NCToken]): Boolean =
            val posOpt = toks.size match
                case 0 => throw new AssertionError(s"Unexpected empty tokens.")
                case 1 => getPos(toks.head).?
                case _ => None

            // Hash access.
            stems.matches(toStemKey(toks), posOpt) ||
            lemmas.matches(toLemmaKey(toks), posOpt) ||
            origins.matches(toOriginalKey(toks), posOpt) ||
            // Scan access.
            wildcardsLemmas.matches(toLemmaKey(toks), posOpt) ||
            wildcardsOrigins.matches(toOriginalKey(toks), posOpt)

    /**
      *
      */
    private def init(): Unit =
        addStems = addSet.map(getStem)
        exclStems = exclSet.map(getStem)

        def check(name: String, set: Set[String]): Unit =
            if set.exists(_.exists(_.isWhitespace)) then throw E(s"$name contain a string with whitespaces.")

        check("Additional synonyms", addStems)
        check("Excluded synonyms", exclStems)

        val dups = addStems.intersect(exclStems)
        if dups.nonEmpty then E(s"Duplicate stems detected between additional and excluded stopwords [dups=${dups.mkString(",")}]")

        percents = PERCENTS.map(getStem)

        // Case sensitive.
        val m = readStopWords(U.readLines(res = "stopwords/en_stop_words.txt", filterText = true, log = logger))

        stopWords = m(false)
        exceptions = m(true)

        val gen = new NCEnStopWordGenerator(stemmer)

        firstWords = gen.mkFirstWords()
        nounWords = gen.mkNounWords()

    /**
      * Parses configuration template.
      *
      * @param lines Configuration file content.
      * @return Holder and is-exception flag.
      */
    private def readStopWords(lines: Iterator[String]): Map[Boolean, StopWordHolder] =
        // 1. Prepares accumulation data structure.
        enum WordForm:
            case STEM, LEM, ORIG

        import WordForm.*

        class Condition[T]:
            val any = mutable.HashSet.empty[T]
            val incls = mutable.HashMap.empty[String, mutable.HashSet[T]]
            val excls = mutable.HashMap.empty[String, mutable.HashSet[T]]

            def addCondition(cond: T, poses: Map[String, Boolean]): Any =
                if poses.isEmpty then
                    any += cond
                else
                    def add(m: mutable.HashMap[String, mutable.HashSet[T]], incl: Boolean): Unit =
                        poses.filter { (_, isIncl) => isIncl == incl }.keys.foreach(pos =>