in nlpcraft-stanford/src/main/scala/org/apache/nlpcraft/nlp/stanford/NCStanfordNLPEntityParser.scala [45:76]
override def parse(req: NCRequest, cfg: NCModelConfig, toks: List[NCToken]): List[NCEntity] =
val doc = new CoreDocument(req.getText)
stanford.annotate(doc)
val res = scala.collection.mutable.ArrayBuffer.empty[NCEntity]
for (e <- doc.entityMentions().asScala)
val typ = e.entityType().toLowerCase
if supportedLc.contains(typ) then
val offsets = e.charOffsets()
val t1 = toks.find(_.getStartCharIndex == offsets.first)
lazy val t2 = toks.find(_.getEndCharIndex == offsets.second)
if t1.nonEmpty && t2.nonEmpty then
val props = mutable.ArrayBuffer.empty[(String, Any)]
val nne = e.coreMap().get(classOf[NormalizedNamedEntityTagAnnotation])
if nne != null then props += "nne" -> nne
// Key ignored because it can be category with higher level (`location` for type `country`).
val conf = e.entityTypeConfidences()
if conf != null && conf.size() == 1 then props += "confidence" -> conf.asScala.head._2
val entToks = toks.filter(
t => t.getStartCharIndex >= t1.get.getStartCharIndex && t.getEndCharIndex <= t2.get.getEndCharIndex
)
if entToks.nonEmpty then
res +=
new NCPropertyMapAdapter with NCEntity:
props.foreach { (k, v) => put(s"stanford:$typ:$k", v) }