private static void processXmlFile()

in ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java [139:381]


  private static void processXmlFile(JCas jCas, File xmlFile) throws AnalysisEngineProcessException{
    // load the XML
    Element dataElem;
    try {
      dataElem = new SAXBuilder().build(xmlFile.toURI().toURL()).getRootElement();
    } catch (MalformedURLException e) {
      throw new AnalysisEngineProcessException(e);
    } catch (JDOMException e) {
      throw new AnalysisEngineProcessException(e);
    } catch (IOException e) {
      throw new AnalysisEngineProcessException(e);
    }

    int curEventId = 1;
    int curTimexId = 1;
    int curRelId = 1;
    int docLen = jCas.getDocumentText().length();
    
    for (Element annotationsElem : dataElem.getChildren("annotations")) {

      Map<String, Annotation> idToAnnotation = Maps.newHashMap();
      for (Element entityElem : annotationsElem.getChildren("entity")) {
        String id = removeSingleChildText(entityElem, "id", null);
        Element spanElem = removeSingleChild(entityElem, "span", id);
        String type = removeSingleChildText(entityElem, "type", id);
        Element propertiesElem = removeSingleChild(entityElem, "properties", id);

        // UIMA doesn't support disjoint spans, so take the span enclosing
        // everything
        int begin = Integer.MAX_VALUE;
        int end = Integer.MIN_VALUE;
        for (String spanString : spanElem.getText().split(";")) {
          String[] beginEndStrings = spanString.split(",");
          if (beginEndStrings.length != 2) {
            error("span not of the format 'number,number'", id);
          }
          int spanBegin = Integer.parseInt(beginEndStrings[0]);
          int spanEnd = Integer.parseInt(beginEndStrings[1]);
          if (spanBegin < begin && spanBegin >= 0) {
            begin = spanBegin;
          }
          if (spanEnd > end && spanEnd <= docLen) {
            end = spanEnd;
          }
        }
        if(begin < 0 || end > docLen){
          error("Illegal begin or end boundary", id);
          continue;
        }

        Annotation annotation;
        if (type.equals("EVENT")) {
          String docTimeRel = removeSingleChildText(propertiesElem, "DocTimeRel", id);
          if (docTimeRel == null) {
            error("no docTimeRel, assuming OVERLAP", id);
            docTimeRel = "OVERLAP";
          }
          String eventType = removeSingleChildText(propertiesElem, "Type", id);
          String degree = removeSingleChildText(propertiesElem, "Degree", id);
          String polarity = removeSingleChildText(propertiesElem, "Polarity", id);
          String contextualModality = removeSingleChildText(propertiesElem, "ContextualModality", id);
          String contextualAspect = removeSingleChildText(propertiesElem, "ContextualAspect", id);
          String permanence = removeSingleChildText(propertiesElem, "Permanence", id);
          EventMention eventMention = new EventMention(jCas, begin, end);
          Event event = new Event(jCas);
          EventProperties eventProperties = new EventProperties(jCas);
          eventProperties.setDocTimeRel(docTimeRel);
          eventProperties.setCategory(eventType);
          eventProperties.setDegree(degree);
          if (polarity.equals("POS")) {
            eventProperties.setPolarity(CONST.NE_POLARITY_NEGATION_ABSENT);
          } else if (polarity.equals("NEG")) {
            eventProperties.setPolarity(CONST.NE_POLARITY_NEGATION_PRESENT);
          } else {
            error("polarity that was not POS or NEG", id);
          }
          eventProperties.setContextualModality(contextualModality);
          eventProperties.setContextualAspect(contextualAspect);
          eventProperties.setPermanence(permanence);
          eventProperties.addToIndexes();
          event.setConfidence(1.0f);
          event.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
          event.setProperties(eventProperties);
          event.setMentions(new FSArray(jCas, 1));
          event.setMentions(0, eventMention);
          event.addToIndexes();
          eventMention.setId(curEventId++);
          eventMention.setConfidence(1.0f);
          eventMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
          eventMention.setEvent(event);
          eventMention.addToIndexes();
          annotation = eventMention;

        } else if (type.equals("TIMEX3")) {
          String timeClass = removeSingleChildText(propertiesElem, "Class", id);
          TimeMention timeMention = new TimeMention(jCas, begin, end);
          timeMention.setId(curTimexId++);
          timeMention.setTimeClass(timeClass);
          timeMention.addToIndexes();
          annotation = timeMention;

        } else if (type.equals("DOCTIME")) {
          TimeMention timeMention = new TimeMention(jCas, begin, end);
          timeMention.setId(curTimexId++);
          timeMention.setTimeClass(type);
          timeMention.addToIndexes();
          annotation = timeMention;

        } else if (type.equals("SECTIONTIME")) {
          TimeMention timeMention = new TimeMention(jCas, begin, end);
          timeMention.setId(curTimexId++);
          timeMention.setTimeClass(type);
          timeMention.addToIndexes();
          annotation = timeMention;

        } else if (type.equals("Markable")) {
          while(end >= begin && (jCas.getDocumentText().charAt(end-1) == '\n' || jCas.getDocumentText().charAt(end-1) == '\r')){
            end--;
          }
          Markable markable = new Markable(jCas, begin, end);
          markable.addToIndexes();
          annotation = markable;

        } else if (type.equals("DUPLICATE")) {
          LOGGER.warn("Ignoring duplicate sections in annotations.");
          continue;
        } else {
          throw new UnsupportedOperationException("unsupported entity type: " + type);
        }

        // match the annotation to it's ID for later use
        idToAnnotation.put(id, annotation);

        // make sure all XML has been consumed
        removeSingleChild(entityElem, "parentsType", id);
        if (!propertiesElem.getChildren().isEmpty() || !entityElem.getChildren().isEmpty()) {
          List<String> children = Lists.newArrayList();
          for (Element child : propertiesElem.getChildren()) {
            children.add(child.getName());
          }
          for (Element child : entityElem.getChildren()) {
            children.add(child.getName());
          }
          error("unprocessed children " + children, id);
        }
      }

      for (Element relationElem : annotationsElem.getChildren("relation")) {
        String id = removeSingleChildText(relationElem, "id", null);
        String type = removeSingleChildText(relationElem, "type", id);
        Element propertiesElem = removeSingleChild(relationElem, "properties", id);

        if (type.equals("TLINK")) {
          String sourceID = removeSingleChildText(propertiesElem, "Source", id);
          String targetID = removeSingleChildText(propertiesElem, "Target", id);
          String tlinkType = removeSingleChildText(propertiesElem, "Type", id);
          TemporalTextRelation relation = new TemporalTextRelation(jCas);
          relation.setId(curRelId++);
          addRelation(jCas, relation, sourceID, targetID, tlinkType, idToAnnotation, id);

        } else if (type.equals("ALINK")) {
          String sourceID = removeSingleChildText(propertiesElem, "Source", id);
          String targetID = removeSingleChildText(propertiesElem, "Target", id);
          String alinkType = removeSingleChildText(propertiesElem, "Type", id);
          AspectualTextRelation relation = new AspectualTextRelation(jCas);
          addRelation(jCas, relation, sourceID, targetID, alinkType, idToAnnotation, id);

        } else if (type.equals("Identical")) {
          // Build list of Markables from FirstInstance and Coreferring_String annotations:
          String mention = removeSingleChildText(propertiesElem, "FirstInstance", id);
          List<Markable> markables = new ArrayList<>();
          Markable antecedent, anaphor;
          antecedent = (Markable) idToAnnotation.get(mention);
          if(antecedent != null){
            markables.add(antecedent);
          }else{
            error("Null markable as FirstInstance", id);
          }
          List<Element> corefs = propertiesElem.getChildren("Coreferring_String");
          for(Element coref : corefs){
            mention = coref.getText();
            anaphor = (Markable) idToAnnotation.get(mention);
            if(anaphor != null){
              markables.add(anaphor);
            }else{
              error("Null markable as Coreferring_String", id);
            }
          }
          // Iterate over markable list creating binary coref relations:
          for(int antInd = 0; antInd < markables.size()-1; antInd++){
            int anaInd = antInd + 1;
            // create set of binary relations from chain elements:
            CoreferenceRelation pair = new CoreferenceRelation(jCas);
            pair.setCategory("Identity");
            RelationArgument arg1 = new RelationArgument(jCas);
            arg1.setArgument(markables.get(antInd));
            arg1.setRole("antecedent");
            pair.setArg1(arg1);
            RelationArgument arg2 = new RelationArgument(jCas);
            arg2.setArgument(markables.get(anaInd));
            arg2.setRole("anaphor");
            pair.setArg2(arg2);
            pair.addToIndexes();
          }
          // Create FSList from markable list and add to collection text relation:
          if(markables.size() > 1){
            CollectionTextRelation chain = new CollectionTextRelation(jCas);
            FSList list = ListFactory.buildList(jCas, markables);
            list.addToIndexes();
            chain.setMembers(list);
            chain.addToIndexes();
          }else{
            error("Coreference chain of length <= 1", id);
          }
          propertiesElem.removeChildren("Coreferring_String");
        } else if (type.equals("Set/Subset")){
          error("This reader has not implemented reading of Set/Subset relations yet", id);
          
        } else if (type.equals("Whole/Part")){
          error("This reader has not implemented reading of Whole/Part relations yet", id);
          
        } else if (type.equals("Appositive")){
          error("This reader has not implemented reading of Appositive relations yet", id);
          
        } else {
          throw new UnsupportedOperationException("unsupported relation type: " + type);
        }

        // make sure all XML has been consumed
        removeSingleChild(relationElem, "parentsType", id);
        if (!propertiesElem.getChildren().isEmpty() || !relationElem.getChildren().isEmpty()) {
          List<String> children = Lists.newArrayList();
          for (Element child : propertiesElem.getChildren()) {
            children.add(child.getName());
          }
          for (Element child : relationElem.getChildren()) {
            children.add(child.getName());
          }
          error("unprocessed children " + children, id);
        }
      }
    }
  }