private void processLeaf()

in opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java [246:317]


  private void processLeaf(Leaf leaf, List<String> sentence, List<Span> names) {

    boolean alreadyAdded = false;

    if (leftContractionPart != null) {
      // will handle the contraction
      String right = leaf.getLexeme();

      String c = PortugueseContractionUtility.toContraction(
          leftContractionPart, right);
      if (c != null) {
        String[] parts = WHITESPACE_PATTERN.split(c);
        sentence.addAll(Arrays.asList(parts));
        alreadyAdded = true;
      } else {
        // contraction was missing! why?
        sentence.add(leftContractionPart);
        // keep alreadyAdded false.
      }
      leftContractionPart = null;
    }

    String namedEntityTag = null;
    int startOfNamedEntity = -1;

    String leafTag = leaf.getSecondaryTag();
    boolean expandLastNER = false; // used when we find a <NER2> tag

    if (leafTag != null) {
      if (leafTag.contains("<sam->") && !alreadyAdded) {
        String[] lexemes = UNDERLINE_PATTERN.split(leaf.getLexeme());
        if (lexemes.length > 1) {
          sentence.addAll(Arrays.asList(lexemes).subList(0, lexemes.length - 1));
        }
        leftContractionPart = lexemes[lexemes.length - 1];
        return;
      }
      if (leafTag.contains("<NER2>")) {
        // this one an be part of the last name
        expandLastNER = true;
      }
      namedEntityTag = getNER(leafTag);
    }

    if (namedEntityTag != null) {
      startOfNamedEntity = sentence.size();
    }

    if (!alreadyAdded) {
      sentence.addAll(processLexeme(leaf.getLexeme()));
    }

    if (namedEntityTag != null) {
      names
      .add(new Span(startOfNamedEntity, sentence.size(), namedEntityTag));
    }

    if (expandLastNER) {
      // if the current leaf has the tag <NER2>, it can be the continuation of
      // a NER.
      // we check if it is true, and expand the last NER
      int lastIndex = names.size() - 1;
      if (names.size() > 0) {
        Span last = names.get(lastIndex);
        if (last.getEnd() == sentence.size() - 1) {
          names.set(lastIndex, new Span(last.getStart(), sentence.size(),
              last.getType()));
        }
      }
    }

  }