public Sentence parse()

in opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceStream.java [88:208]


    public Sentence parse(String sentenceString, int para, boolean isTitle, boolean isBox) {
      Sentence sentence;
      Node root = new Node();
      try (BufferedReader reader = new BufferedReader(new StringReader(sentenceString))) {
        // first line is <s ...>
        String line = reader.readLine();

        boolean useSameTextAndMeta = false; // to handle cases where there are diff sug of parse (&&)

        // should find the source source
        while (!line.startsWith("SOURCE")) {
          if (line.equals("&&")) {
            // same sentence again!
            useSameTextAndMeta = true;
            break;
          }
          line = reader.readLine();
          if (line == null) {
            return null;
          }
        }
        if (!useSameTextAndMeta) {
          // got source, get the metadata
          String metaFromSource = line.substring(7);
          line = reader.readLine();
          // we should have the plain sentence
          // we remove the first token
          int start = line.indexOf(" ");
          text = line.substring(start + 1).trim();
          text = fixPunctuation(text);
          String titleTag = "";
          if (isTitle) titleTag = " title";
          String boxTag = "";
          if (isBox) boxTag = " box";
          if (start > 0) {
            meta = line.substring(0, start) + " p=" + para + titleTag + boxTag + metaFromSource;
          }
        }
        sentence = new Sentence(text, root, meta);
        // now we look for the root node
        do {
          line = reader.readLine();
        } while (line != null && line.startsWith("###")); // skip lines starting with ###

        // got the root. Add it to the stack
        Stack<Node> nodeStack = new Stack<>();

        root.setSyntacticTag("ROOT");
        root.setLevel(0);
        nodeStack.add(root);


        /* now we have to take care of the lastLevel. Every time it raises, we will add the
        leaf to the node at the top. If it decreases, we remove the top. */

        while (line != null && line.length() != 0 && !line.startsWith("</s>") && !line.equals("&&")) {
          TreeElement element = this.getElement(line);

          if (element != null) {
            // The idea here is to keep a stack of nodes that are candidates for
            // parenting the following elements (nodes and leafs).

            // 1) When we get a new element, we check its level and remove from
            // the top of the stack nodes that are brothers or nephews.
            while (!nodeStack.isEmpty() && element.getLevel() > 0
                && element.getLevel() <= nodeStack.peek().getLevel()) {
              Node nephew = nodeStack.pop();
            }

            if (element.isLeaf() ) {
              // 2a) If the element is a leaf and there is no parent candidate,
              // add it as a daughter of the root.
              if (nodeStack.isEmpty()) {
                root.addElement(element);
              } else {
                // 2b) There are parent candidates.
                // look for the node with the correct level
                Node peek = nodeStack.peek();
                if (element.level == 0) { // add to the root
                  nodeStack.firstElement().addElement(element);
                } else {
                  Node parent = null;
                  int index = nodeStack.size() - 1;
                  while (parent == null) {
                    if (peek.getLevel() < element.getLevel()) {
                      parent = peek;
                    } else {
                      index--;
                      if (index > -1) {
                        peek = nodeStack.get(index);
                      } else {
                        parent = nodeStack.firstElement();
                      }
                    }
                  }
                  parent.addElement(element);
                }
              }
            } else {
              // 3) Check if the element that is at the top of the stack is this
              // node parent, if yes add it as a son
              if (!nodeStack.isEmpty() && nodeStack.peek().getLevel() < element.getLevel()) {
                nodeStack.peek().addElement(element);
              } else {
                logger.warn("should not happen!");
              }
              // 4) Add it to the stack so it is a parent candidate.
              nodeStack.push((Node) element);

            }
          }
          line = reader.readLine();
        }

      } catch (Exception e) {
        logger.warn("Caught exception for the given sentence: '{}'", sentenceString, e);
        return null;
      }
      // second line should be SOURCE
      return sentence;
    }