public TreeElement getElement()

in opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java [223:322]


    public TreeElement getElement(String line) {
      // Note: all levels are higher than 1, because 0 is reserved for the root.

      // try node
      Matcher nodeMatcher = NODE_PATTERN.matcher(line);
      if (nodeMatcher.matches()) {
        int level = nodeMatcher.group(1).length() + 1;
        String syntacticTag = nodeMatcher.group(2);
        Node node = new Node();
        node.setLevel(level);
        node.setSyntacticTag(syntacticTag);
        return node;
      }

      Matcher leafMatcher = LEAF_PATTERN.matcher(line);
      if (leafMatcher.matches()) {
        int level = leafMatcher.group(1).length() + 1;
        String syntacticTag = leafMatcher.group(2);
        String funcTag = leafMatcher.group(3);
        String lemma = leafMatcher.group(4);
        String secondaryTag = leafMatcher.group(5);
        String morphologicalTag = leafMatcher.group(6);
        String lexeme = leafMatcher.group(7);
        Leaf leaf = new Leaf();
        leaf.setLevel(level);
        leaf.setSyntacticTag(syntacticTag);
        leaf.setFunctionalTag(funcTag);
        leaf.setSecondaryTag(secondaryTag);
        leaf.setMorphologicalTag(morphologicalTag);
        leaf.setLexeme(lexeme);
        leaf.setLemma(lemma);

        return leaf;
      }

      Matcher punctuationMatcher = PUNCTUATION_PATTERN.matcher(line);
      if (punctuationMatcher.matches()) {
        int level = punctuationMatcher.group(1).length() + 1;
        String lexeme = punctuationMatcher.group(2);
        Leaf leaf = new Leaf();
        leaf.setLevel(level);
        leaf.setLexeme(lexeme);
        return leaf;
      }

      // process the bizarre cases
      if (line.equals("_") || line.startsWith("<lixo") || line.startsWith("pause")) {
        return null;
      }

      if (line.startsWith("=")) {
        Matcher bizarreLeafMatcher = BIZARRE_LEAF_PATTERN.matcher(line);
        if (bizarreLeafMatcher.matches()) {
          int level = bizarreLeafMatcher.group(1).length() + 1;
          String syntacticTag = bizarreLeafMatcher.group(2);
          String lemma = bizarreLeafMatcher.group(3);
          String morphologicalTag = bizarreLeafMatcher.group(4);
          String lexeme = bizarreLeafMatcher.group(5);
          Leaf leaf = new Leaf();
          leaf.setLevel(level);
          leaf.setSyntacticTag(syntacticTag);
          leaf.setMorphologicalTag(morphologicalTag);
          leaf.setLexeme(lexeme);
          if (lemma != null) {
            if (lemma.length() > 2) {
              lemma = lemma.substring(1, lemma.length() - 1);
            }
            leaf.setLemma(lemma);
          }

          return leaf;
        } else {
          int level = line.lastIndexOf("=") + 1;
          String lexeme = line.substring(level + 1);

          if (lexeme.matches("\\w.*?[\\.<>].*")) {
            return null;
          }

          Leaf leaf = new Leaf();
          leaf.setLevel(level + 1);
          leaf.setSyntacticTag("");
          leaf.setMorphologicalTag("");
          leaf.setFunctionalTag("");
          leaf.setLexeme(lexeme);

          return leaf;
        }
      }

      logger.warn("Couldn't parse leaf: {}", line);
      Leaf leaf = new Leaf();
      leaf.setLevel(1);
      leaf.setSyntacticTag("");
      leaf.setMorphologicalTag("");
      leaf.setFunctionalTag("");
      leaf.setLexeme(line);

      return leaf;
    }