@tailrec private def combine()

in nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/parsers/NCSemanticEntityParser.scala [116:257]


    @tailrec private def combine(data1: Seq[String], data2: Seq[String], i: Int = 0, tmp: Set[List[String]] = Set(List.empty)): Set[List[String]] =
        require(data1.sizeIs == data2.size)

        if data1.isEmpty then Set.empty
        else if i >= data1.size then tmp
        else combine(data1, data2, i + 1, tmp.map(_ :+ data1(i)) ++ tmp.map(_ :+ data2(i)))

import NCSemanticEntityParser.*

/**
  * Semantic entity parser implementation.
  *
  * This synonyms based parser provides simple yet powerful way to find domain specific data in the input text.
  * It is configured via [[NCSemanticElement]] list which represents all possible [[NCEntity named entities]] that
  * this parser can detect.
  *
  * [[NCSemanticElement Semantic elements]] can be configured via YAML or JSON files in special format or
  * passed in this parser as programmatically prepared list. [[NCSemanticElement Semantic elements]] contain set of
  * synonyms which can use special [[https://nlpcraft.apache.org/built-in-entity-parser.html#macros macros]].
  * These macros also can be provided via YAML and JSON files or passed directly in case of programmatically prepared
  * [[NCSemanticElement]] list.
  *
  * Example of YAML elements definition.
  * <pre>
  * macros:
  *   "&lt;OF&gt;": "{of|for|per}"
  *   "&lt;CUR&gt;": "{current|present|now|local}"
  *   "&lt;TIME&gt;": "{time &lt;OF&gt; day|day time|date|time|moment|datetime|hour|o'clock|clock|date time}"
  * elements:
  *   - id: "x:time"
  *     description: "Date and/or time token indicator."
  *     synonyms:
  *       - "{&lt;CUR&gt;|_} &lt;TIME>"
  *       - "what &lt;TIME&gt; {is it now|now|is it|_}"
  * </pre>
  * Given this simple definition the **x:time** element can be detected by a large number of synonyms like *day time*,
  * *local day time*, *time of day*, *local time of day*, *what hour is it*, etc.
  *
  * @param stemmer [[NCStemmer]] implementation which used to match tokens and given [[NCSemanticElement]] synonyms.
  * @param parser [[NCTokenParser]] implementation which will be used for [[NCSemanticElement]] synonyms tokenization.
  *     It should be same implementation as used in [[NCPipeline.getTokenParser]].
  * @param macros Macros map which are used for extracting [[NCSemanticElement]] synonyms defined via **macros**.
  *    More information at [[https://nlpcraft.apache.org/built-in-entity-parser.html#macros]].
  * @param elements Programmatically prepared [[NCSemanticElement]] instances. Note that either the model or elements
  *    must be supplied at least.
  * @param mdlResOpt Optional relative path, absolute path, classpath resource or URL to YAML or JSON semantic model
  *    which contains [[NCSemanticElement]] definitions. Note that either the model or elements must be supplied at least.
  *
  * @see [[NCSemanticElement]]
  */
class NCSemanticEntityParser private (
    stemmer: NCStemmer,
    parser: NCTokenParser,
    macros: Map[String, String],
    elements: List[NCSemanticElement],
    mdlResOpt: Option[String]
) extends NCEntityParser with LazyLogging:
    require(stemmer != null, "Stemmer cannot be null.")
    require(parser != null, "Token parser cannot be null.")
    require(macros != null, "Macros cannot be null.")
    require(elements != null && elements.nonEmpty || mdlResOpt.isDefined, "Either elements or external YAML/JSON model must be supplied.")

    /**
      * Creates [[NCSemanticEntityParser]] instance with given parameters.
      *
      * @param stemmer [[NCStemmer]] implementation for synonyms language.
      * @param parser [[NCTokenParser]] implementation.
      * @param macros Macros map. Empty by default.
      * @param elements [[NCSemanticElement]] list.
      */
    def this(stemmer: NCStemmer, parser: NCTokenParser, macros: Map[String, String], elements: List[NCSemanticElement]) =
        this(stemmer, parser, macros, elements, None)

    /**
      *
      * Creates [[NCSemanticEntityParser]] instance with given parameters.
      *
      * @param stemmer [[NCStemmer]] implementation for synonyms language.
      * @param parser [[NCTokenParser]] implementation.
      * @param elements [[NCSemanticElement]] list.
      */
    def this(stemmer: NCStemmer, parser: NCTokenParser, elements: List[NCSemanticElement]) =
        this(stemmer, parser, Map.empty, elements, None)

    /**
      *
      * Creates [[NCSemanticEntityParser]] instance with given parameters.
      *
      * @param stemmer [[NCStemmer]] implementation for synonyms language.
      * @param parser [[NCTokenParser]] implementation.
      * @param mdlRes Relative path, absolute path, classpath resource or URL to YAML or JSON semantic model definition.
      */
    def this(stemmer: NCStemmer, parser: NCTokenParser, mdlRes: String) =
        this(stemmer, parser, Map.empty, List.empty, mdlRes.?)

    private lazy val scrType =
        require(mdlResOpt.isDefined)
        NCSemanticSourceType.detect(mdlResOpt.get)

    private var synsHolder: NCSemanticSynonymsHolder = _
    private var elemsMap: Map[String, NCSemanticElement] = _

    init()

    /**
      *
      */
    private def init(): Unit =
        val (macros, elements, elemsMap) =
            def toMap(elems: Seq[NCSemanticElement]): Map[String, NCSemanticElement] = elems.map(p => p.getType -> p).toMap

            mdlResOpt match
                case Some(mdlSrc) =>
                    val src = NCSemanticSourceReader.read(new BufferedInputStream(NCUtils.getStream(mdlSrc)), scrType)
                    logger.trace(s"Loaded resource: $mdlResOpt")
                    (src.macros, src.elements, toMap(src.elements))
                case None => (this.macros, this.elements, toMap(this.elements))

        this.synsHolder = NCSemanticSynonymsProcessor.prepare(stemmer, parser, macros, elements)
        this.elemsMap = elemsMap

    /**
      *
      * @param name
      */
    private def warnMissedProperty(name: String): Unit = logger.warn(s"'$name' property not found. Is proper token enricher configured?")

    /** @inheritdoc */
    override def parse(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): List[NCEntity] =
        if toks.exists(_.get[String]("stopword").isEmpty) then warnMissedProperty("stopword")

        val stems = toks.map(p => p -> stemmer.stem(p.getText.toLowerCase)).toMap
        val stems4Lemms =
            var ok = true
            val seq =
                for (t <- toks; lemmaOpt = t.get[String]("lemma") if ok)
                    yield
                        ok = lemmaOpt.isDefined
                        t -> lemmaOpt.orNull

            if ok then
                seq.toMap.map { (tok, lemma) => tok -> stemmer.stem(lemma.toLowerCase) }