protected void addParseEvents()

in opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java [140:366]


  protected void addParseEvents(List<Event> parseEvents, Parse[] chunks) {
    /* Frontier nodes built from node in a completed parse.  Specifically,
     * they have all their children regardless of the stage of parsing.*/
    List<Parse> rightFrontier = new ArrayList<>();
    List<Parse> builtNodes = new ArrayList<>();
    /* Nodes which characterize what the parse looks like to the parser as its being built.
     * Specifically, these nodes don't have all their children attached like the parents of
     * the chunk nodes do.*/
    Parse[] currentChunks = new Parse[chunks.length];
    for (int ci = 0; ci < chunks.length; ci++) {
      currentChunks[ci] = (Parse) chunks[ci].clone();
      currentChunks[ci].setPrevPunctuation(chunks[ci].getPreviousPunctuationSet());
      currentChunks[ci].setNextPunctuation(chunks[ci].getNextPunctuationSet());
      currentChunks[ci].setLabel(Parser.COMPLETE);
      chunks[ci].setLabel(Parser.COMPLETE);
    }
    for (int ci = 0; ci < chunks.length; ci++) {
      Parse parent = chunks[ci].getParent();
      Parse prevParent = chunks[ci];
      int off = 0;
      //build un-built parents
      if (!chunks[ci].isPosTag()) {
        builtNodes.add(off++,chunks[ci]);
      }
      //perform build stages
      while (!parent.getType().equals(AbstractBottomUpParser.TOP_NODE) && parent.getLabel() == null) {
        if (!prevParent.getType().equals(parent.getType())) {
          //build level
          if (logger.isDebugEnabled()) {
            logger.debug("Build: {} for: {}", parent.getType(), currentChunks[ci]);
          }
          if (etype == ParserEventTypeEnum.BUILD) {
            parseEvents.add(new Event(parent.getType(),
                buildContextGenerator.getContext(currentChunks, ci)));
          }
          builtNodes.add(off++,parent);
          Parse newParent = new Parse(currentChunks[ci].getText(),
              currentChunks[ci].getSpan(),parent.getType(),1,0);
          newParent.add(currentChunks[ci],rules);
          newParent.setPrevPunctuation(currentChunks[ci].getPreviousPunctuationSet());
          newParent.setNextPunctuation(currentChunks[ci].getNextPunctuationSet());
          currentChunks[ci].setParent(newParent);
          currentChunks[ci] = newParent;
          newParent.setLabel(Parser.BUILT);
          //see if chunk is complete
          if (lastChild(chunks[ci], parent)) {
            if (etype == ParserEventTypeEnum.CHECK) {
              parseEvents.add(new Event(Parser.COMPLETE,
                  checkContextGenerator.getContext(currentChunks[ci],currentChunks, ci,false)));
            }
            currentChunks[ci].setLabel(Parser.COMPLETE);
            parent.setLabel(Parser.COMPLETE);
          }
          else {
            if (etype == ParserEventTypeEnum.CHECK) {
              parseEvents.add(new Event(Parser.INCOMPLETE,
                  checkContextGenerator.getContext(currentChunks[ci],currentChunks,ci,false)));
            }
            currentChunks[ci].setLabel(Parser.INCOMPLETE);
            parent.setLabel(Parser.COMPLETE);
          }

          chunks[ci] = parent;
        }
        //TODO: Consider whether we need to set this label or train parses at all.
        parent.setLabel(Parser.BUILT);
        prevParent = parent;
        parent = parent.getParent();
      }
      //decide to attach
      if (etype == ParserEventTypeEnum.BUILD) {
        parseEvents.add(new Event(Parser.DONE, buildContextGenerator.getContext(currentChunks, ci)));
      }
      //attach node
      String attachType = null;
      /* Node selected for attachment. */
      Parse attachNode = null;
      int attachNodeIndex = -1;
      if (ci == 0) {
        Parse top = new Parse(currentChunks[ci].getText(),
            new Span(0,currentChunks[ci].getText().length()),AbstractBottomUpParser.TOP_NODE,1,0);
        top.insert(currentChunks[ci]);
      }
      else {
        /* Right frontier consisting of partially-built nodes based on current state of the parse.*/
        List<Parse> currentRightFrontier = Parser.getRightFrontier(currentChunks[0],punctSet);
        if (currentRightFrontier.size() != rightFrontier.size()) {
          logger.error("Frontiers mis-aligned: {} != {} {} {}", currentRightFrontier.size(),
              rightFrontier.size(), currentRightFrontier, rightFrontier);
          System.exit(1);
        }
        Map<Parse, Integer> parents = getNonAdjoinedParent(chunks[ci]);
        //try daughters first.
        for (int cfi = 0; cfi < currentRightFrontier.size(); cfi++) {
          Parse frontierNode = rightFrontier.get(cfi);
          Parse cfn = currentRightFrontier.get(cfi);
          if (!Parser.checkComplete || !Parser.COMPLETE.equals(cfn.getLabel())) {
            Integer i = parents.get(frontierNode);
            if (logger.isDebugEnabled())
              logger.debug("Looking at attachment site ({}): {} ci={} cs={}, {} :for {} {} -> {}",
                  cfi, cfn.getType(), i, nonPunctChildCount(cfn), cfn, currentChunks[ci].getType(),
                  currentChunks[ci], parents
              );

            if (attachNode == null &&  i != null && i == nonPunctChildCount(cfn)) {
              attachType = Parser.ATTACH_DAUGHTER;
              attachNodeIndex = cfi;
              attachNode = cfn;
              if (etype == ParserEventTypeEnum.ATTACH) {
                parseEvents.add(new Event(attachType, attachContextGenerator.getContext(currentChunks,
                    ci, currentRightFrontier, attachNodeIndex)));
              }
            }
          }
          else {
            if (logger.isDebugEnabled())
              logger.debug("Skipping ({}): {},{} {} :for {} {} -> {}",
                  cfi, cfn.getType(), cfn.getPreviousPunctuationSet(), cfn,
                  currentChunks[ci].getType(), currentChunks[ci], parents);
          }
          // Can't attach past first incomplete node.
          if (Parser.checkComplete && cfn.getLabel().equals(Parser.INCOMPLETE)) {
            if (logger.isDebugEnabled()) logger.debug("breaking on incomplete: {} {}",  cfn.getType(), cfn);
            break;
          }
        }
        //try sisters, and generate non-attach events.
        for (int cfi = 0; cfi < currentRightFrontier.size(); cfi++) {
          Parse frontierNode = rightFrontier.get(cfi);
          Parse cfn = currentRightFrontier.get(cfi);
          if (attachNode == null && parents.containsKey(frontierNode.getParent())
              && frontierNode.getType().equals(frontierNode.getParent().getType())
              ) { //&& frontierNode.getParent().getLabel() == null) {
            attachType = Parser.ATTACH_SISTER;
            attachNode = cfn;
            attachNodeIndex = cfi;
            if (etype == ParserEventTypeEnum.ATTACH) {
              parseEvents.add(new Event(Parser.ATTACH_SISTER,
                  attachContextGenerator.getContext(currentChunks, ci, currentRightFrontier, cfi)));
            }
            chunks[ci].getParent().setLabel(Parser.BUILT);
          }
          else if (cfi == attachNodeIndex) {
            //skip over previously attached daughter.
          }
          else {
            if (etype == ParserEventTypeEnum.ATTACH) {
              parseEvents.add(new Event(Parser.NON_ATTACH,
                  attachContextGenerator.getContext(currentChunks, ci, currentRightFrontier, cfi)));
            }
          }
          //Can't attach past first incomplete node.
          if (Parser.checkComplete && cfn.getLabel().equals(Parser.INCOMPLETE)) {
            if (logger.isDebugEnabled()) logger.debug("breaking on incomplete: {} {}",  cfn.getType(), cfn);
            break;
          }
        }
        //attach Node
        if (attachNode != null) {
          if (Parser.ATTACH_DAUGHTER.equals(attachType)) {
            Parse daughter = currentChunks[ci];
            if (logger.isDebugEnabled())
              logger.debug("daughter attach a={}:{} d={} com={}", attachNode.getType(),
                  attachNode, daughter, lastChild(chunks[ci], rightFrontier.get(attachNodeIndex)));

            attachNode.add(daughter,rules);
            daughter.setParent(attachNode);
            if (lastChild(chunks[ci], rightFrontier.get(attachNodeIndex))) {
              if (etype == ParserEventTypeEnum.CHECK) {
                parseEvents.add(new Event(Parser.COMPLETE,
                    checkContextGenerator.getContext(attachNode,currentChunks,ci,true)));
              }
              attachNode.setLabel(Parser.COMPLETE);
            }
            else {
              if (etype == ParserEventTypeEnum.CHECK) {
                parseEvents.add(new Event(Parser.INCOMPLETE,
                    checkContextGenerator.getContext(attachNode,currentChunks,ci,true)));
              }
            }
          }
          else if (Parser.ATTACH_SISTER.equals(attachType)) {
            Parse frontierNode = rightFrontier.get(attachNodeIndex);
            rightFrontier.set(attachNodeIndex,frontierNode.getParent());
            Parse sister = currentChunks[ci];
            if (logger.isDebugEnabled())
              logger.debug("sister attach a={}:{} s={} ap={} com={}", attachNode.getType(),
                  attachNode, sister, attachNode.getParent(),
                  lastChild(chunks[ci], rightFrontier.get(attachNodeIndex)));

            Parse newParent = attachNode.getParent().adjoin(sister,rules);

            newParent.setParent(attachNode.getParent());
            attachNode.setParent(newParent);
            sister.setParent(newParent);
            if (attachNode == currentChunks[0]) {
              currentChunks[0] = newParent;
            }
            if (lastChild(chunks[ci], rightFrontier.get(attachNodeIndex))) {
              if (etype == ParserEventTypeEnum.CHECK) {
                parseEvents.add(new Event(Parser.COMPLETE,
                    checkContextGenerator.getContext(newParent,currentChunks,ci,true)));
              }
              newParent.setLabel(Parser.COMPLETE);
            }
            else {
              if (etype == ParserEventTypeEnum.CHECK) {
                parseEvents.add(new Event(Parser.INCOMPLETE,
                    checkContextGenerator.getContext(newParent,currentChunks,ci,true)));
              }
              newParent.setLabel(Parser.INCOMPLETE);
            }

          }
          //update right frontier
          for (int ni = 0; ni < attachNodeIndex; ni++) {
            rightFrontier.remove(0);
          }
        }
        else {
          throw new RuntimeException("No Attachment: " + chunks[ci]);
        }
      }
      rightFrontier.addAll(0,builtNodes);
      builtNodes.clear();
    }
  }