public Map findNames()

in tagging-server/src/main/java/org/apache/opennlp/tagging_server/namefind/BratNameFinderResource.java [65:156]


  public Map<String, NameAnn> findNames(@QueryParam("model") String modelName, String text) {
    
    ServiceReference nerService = ServiceUtil
        .getModelServiceReference(RawTextNameFinderFactory.class, modelName);
    
    try {

      RawTextNameFinderFactory nameFinderFactory = ServiceUtil.getService(
          nerService, RawTextNameFinderFactory.class);

      SentenceDetector sentDetect = nameFinderFactory.createSentenceDetector();
      Tokenizer tokenizer = nameFinderFactory.createTokenizer();
      TokenNameFinder[] nameFinders = nameFinderFactory.createNameFinders();

      Span[] sentenceSpans = sentDetect.sentPosDetect(text);

      Map<String, NameAnn> map = new HashMap<>();

      int indexCounter = 0;

      for (Span sentenceSpan : sentenceSpans) {
        // offset of sentence gets lost here!
        Span[] tokenSpans = tokenizer.tokenizePos(sentenceSpan
                .getCoveredText(text).toString());

        String[] tokens = Span.spansToStrings(tokenSpans, text);

        for (TokenNameFinder nameFinder : nameFinders) {
          Span[] names = nameFinder.find(tokens);

          for (Span name : names) {
            int beginOffset = tokenSpans[name.getStart()].getStart()
                    + sentenceSpan.getStart();
            int endOffset = tokenSpans[name.getEnd() - 1].getEnd()
                    + sentenceSpan.getStart();

            // create a list of new line indexes
            List<Integer> newLineIndexes = new ArrayList<>();

            // TODO: Code needs to handle case that there are multiple new lines in a row

            boolean inNewLineSequence = false;
            for (int ci = beginOffset; ci < endOffset; ci++) {
              if (text.charAt(ci) == '\n' || text.charAt(ci) == '\r') {
                if (!inNewLineSequence) {
                  newLineIndexes.add(ci);
                }
                inNewLineSequence = true;
              } else {
                inNewLineSequence = false;
              }
            }

            List<String> textSegments = new ArrayList<>();
            List<int[]> spanSegments = new ArrayList<>();

            int segmentBegin = beginOffset;

            for (int newLineOffset : newLineIndexes) {
              // create segment from begin to offset
              textSegments.add(text.substring(segmentBegin, newLineOffset));
              spanSegments.add(new int[]{segmentBegin, newLineOffset});

              segmentBegin = findNextNonWhitespaceChar(text, newLineOffset + 1, endOffset);

              if (segmentBegin == -1) {
                break;
              }
            }

            // create left over segment
            if (segmentBegin != -1) {
              textSegments.add(text.substring(segmentBegin, endOffset));
              spanSegments.add(new int[]{segmentBegin, endOffset});
            }

            NameAnn ann = new NameAnn();
            ann.texts = textSegments.toArray(new String[0]);
            ann.offsets = spanSegments.toArray(new int[spanSegments.size()][]);
            ann.type = name.getType();

            map.put(Integer.toString(indexCounter++), ann);
          }
        }
      }
      
      return map;
    
    } finally {
      ServiceUtil.releaseService(nerService);
    }
  }