in ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEAnaforaXMLReader.java [139:381]
private static void processXmlFile(JCas jCas, File xmlFile) throws AnalysisEngineProcessException{
// load the XML
Element dataElem;
try {
dataElem = new SAXBuilder().build(xmlFile.toURI().toURL()).getRootElement();
} catch (MalformedURLException e) {
throw new AnalysisEngineProcessException(e);
} catch (JDOMException e) {
throw new AnalysisEngineProcessException(e);
} catch (IOException e) {
throw new AnalysisEngineProcessException(e);
}
int curEventId = 1;
int curTimexId = 1;
int curRelId = 1;
int docLen = jCas.getDocumentText().length();
for (Element annotationsElem : dataElem.getChildren("annotations")) {
Map<String, Annotation> idToAnnotation = Maps.newHashMap();
for (Element entityElem : annotationsElem.getChildren("entity")) {
String id = removeSingleChildText(entityElem, "id", null);
Element spanElem = removeSingleChild(entityElem, "span", id);
String type = removeSingleChildText(entityElem, "type", id);
Element propertiesElem = removeSingleChild(entityElem, "properties", id);
// UIMA doesn't support disjoint spans, so take the span enclosing
// everything
int begin = Integer.MAX_VALUE;
int end = Integer.MIN_VALUE;
for (String spanString : spanElem.getText().split(";")) {
String[] beginEndStrings = spanString.split(",");
if (beginEndStrings.length != 2) {
error("span not of the format 'number,number'", id);
}
int spanBegin = Integer.parseInt(beginEndStrings[0]);
int spanEnd = Integer.parseInt(beginEndStrings[1]);
if (spanBegin < begin && spanBegin >= 0) {
begin = spanBegin;
}
if (spanEnd > end && spanEnd <= docLen) {
end = spanEnd;
}
}
if(begin < 0 || end > docLen){
error("Illegal begin or end boundary", id);
continue;
}
Annotation annotation;
if (type.equals("EVENT")) {
String docTimeRel = removeSingleChildText(propertiesElem, "DocTimeRel", id);
if (docTimeRel == null) {
error("no docTimeRel, assuming OVERLAP", id);
docTimeRel = "OVERLAP";
}
String eventType = removeSingleChildText(propertiesElem, "Type", id);
String degree = removeSingleChildText(propertiesElem, "Degree", id);
String polarity = removeSingleChildText(propertiesElem, "Polarity", id);
String contextualModality = removeSingleChildText(propertiesElem, "ContextualModality", id);
String contextualAspect = removeSingleChildText(propertiesElem, "ContextualAspect", id);
String permanence = removeSingleChildText(propertiesElem, "Permanence", id);
EventMention eventMention = new EventMention(jCas, begin, end);
Event event = new Event(jCas);
EventProperties eventProperties = new EventProperties(jCas);
eventProperties.setDocTimeRel(docTimeRel);
eventProperties.setCategory(eventType);
eventProperties.setDegree(degree);
if (polarity.equals("POS")) {
eventProperties.setPolarity(CONST.NE_POLARITY_NEGATION_ABSENT);
} else if (polarity.equals("NEG")) {
eventProperties.setPolarity(CONST.NE_POLARITY_NEGATION_PRESENT);
} else {
error("polarity that was not POS or NEG", id);
}
eventProperties.setContextualModality(contextualModality);
eventProperties.setContextualAspect(contextualAspect);
eventProperties.setPermanence(permanence);
eventProperties.addToIndexes();
event.setConfidence(1.0f);
event.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
event.setProperties(eventProperties);
event.setMentions(new FSArray(jCas, 1));
event.setMentions(0, eventMention);
event.addToIndexes();
eventMention.setId(curEventId++);
eventMention.setConfidence(1.0f);
eventMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
eventMention.setEvent(event);
eventMention.addToIndexes();
annotation = eventMention;
} else if (type.equals("TIMEX3")) {
String timeClass = removeSingleChildText(propertiesElem, "Class", id);
TimeMention timeMention = new TimeMention(jCas, begin, end);
timeMention.setId(curTimexId++);
timeMention.setTimeClass(timeClass);
timeMention.addToIndexes();
annotation = timeMention;
} else if (type.equals("DOCTIME")) {
TimeMention timeMention = new TimeMention(jCas, begin, end);
timeMention.setId(curTimexId++);
timeMention.setTimeClass(type);
timeMention.addToIndexes();
annotation = timeMention;
} else if (type.equals("SECTIONTIME")) {
TimeMention timeMention = new TimeMention(jCas, begin, end);
timeMention.setId(curTimexId++);
timeMention.setTimeClass(type);
timeMention.addToIndexes();
annotation = timeMention;
} else if (type.equals("Markable")) {
while(end >= begin && (jCas.getDocumentText().charAt(end-1) == '\n' || jCas.getDocumentText().charAt(end-1) == '\r')){
end--;
}
Markable markable = new Markable(jCas, begin, end);
markable.addToIndexes();
annotation = markable;
} else if (type.equals("DUPLICATE")) {
LOGGER.warn("Ignoring duplicate sections in annotations.");
continue;
} else {
throw new UnsupportedOperationException("unsupported entity type: " + type);
}
// match the annotation to it's ID for later use
idToAnnotation.put(id, annotation);
// make sure all XML has been consumed
removeSingleChild(entityElem, "parentsType", id);
if (!propertiesElem.getChildren().isEmpty() || !entityElem.getChildren().isEmpty()) {
List<String> children = Lists.newArrayList();
for (Element child : propertiesElem.getChildren()) {
children.add(child.getName());
}
for (Element child : entityElem.getChildren()) {
children.add(child.getName());
}
error("unprocessed children " + children, id);
}
}
for (Element relationElem : annotationsElem.getChildren("relation")) {
String id = removeSingleChildText(relationElem, "id", null);
String type = removeSingleChildText(relationElem, "type", id);
Element propertiesElem = removeSingleChild(relationElem, "properties", id);
if (type.equals("TLINK")) {
String sourceID = removeSingleChildText(propertiesElem, "Source", id);
String targetID = removeSingleChildText(propertiesElem, "Target", id);
String tlinkType = removeSingleChildText(propertiesElem, "Type", id);
TemporalTextRelation relation = new TemporalTextRelation(jCas);
relation.setId(curRelId++);
addRelation(jCas, relation, sourceID, targetID, tlinkType, idToAnnotation, id);
} else if (type.equals("ALINK")) {
String sourceID = removeSingleChildText(propertiesElem, "Source", id);
String targetID = removeSingleChildText(propertiesElem, "Target", id);
String alinkType = removeSingleChildText(propertiesElem, "Type", id);
AspectualTextRelation relation = new AspectualTextRelation(jCas);
addRelation(jCas, relation, sourceID, targetID, alinkType, idToAnnotation, id);
} else if (type.equals("Identical")) {
// Build list of Markables from FirstInstance and Coreferring_String annotations:
String mention = removeSingleChildText(propertiesElem, "FirstInstance", id);
List<Markable> markables = new ArrayList<>();
Markable antecedent, anaphor;
antecedent = (Markable) idToAnnotation.get(mention);
if(antecedent != null){
markables.add(antecedent);
}else{
error("Null markable as FirstInstance", id);
}
List<Element> corefs = propertiesElem.getChildren("Coreferring_String");
for(Element coref : corefs){
mention = coref.getText();
anaphor = (Markable) idToAnnotation.get(mention);
if(anaphor != null){
markables.add(anaphor);
}else{
error("Null markable as Coreferring_String", id);
}
}
// Iterate over markable list creating binary coref relations:
for(int antInd = 0; antInd < markables.size()-1; antInd++){
int anaInd = antInd + 1;
// create set of binary relations from chain elements:
CoreferenceRelation pair = new CoreferenceRelation(jCas);
pair.setCategory("Identity");
RelationArgument arg1 = new RelationArgument(jCas);
arg1.setArgument(markables.get(antInd));
arg1.setRole("antecedent");
pair.setArg1(arg1);
RelationArgument arg2 = new RelationArgument(jCas);
arg2.setArgument(markables.get(anaInd));
arg2.setRole("anaphor");
pair.setArg2(arg2);
pair.addToIndexes();
}
// Create FSList from markable list and add to collection text relation:
if(markables.size() > 1){
CollectionTextRelation chain = new CollectionTextRelation(jCas);
FSList list = ListFactory.buildList(jCas, markables);
list.addToIndexes();
chain.setMembers(list);
chain.addToIndexes();
}else{
error("Coreference chain of length <= 1", id);
}
propertiesElem.removeChildren("Coreferring_String");
} else if (type.equals("Set/Subset")){
error("This reader has not implemented reading of Set/Subset relations yet", id);
} else if (type.equals("Whole/Part")){
error("This reader has not implemented reading of Whole/Part relations yet", id);
} else if (type.equals("Appositive")){
error("This reader has not implemented reading of Appositive relations yet", id);
} else {
throw new UnsupportedOperationException("unsupported relation type: " + type);
}
// make sure all XML has been consumed
removeSingleChild(relationElem, "parentsType", id);
if (!propertiesElem.getChildren().isEmpty() || !relationElem.getChildren().isEmpty()) {
List<String> children = Lists.newArrayList();
for (Element child : propertiesElem.getChildren()) {
children.add(child.getName());
}
for (Element child : relationElem.getChildren()) {
children.add(child.getName());
}
error("unprocessed children " + children, id);
}
}
}
}