in ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/THYMEQAAnaforaXMLReader.java [88:275]
private static void processXmlFile(JCas jCas, File xmlFile) throws AnalysisEngineProcessException{
// load the XML
Element dataElem;
try {
dataElem = new SAXBuilder().build(xmlFile.toURI().toURL()).getRootElement();
} catch (MalformedURLException e) {
throw new AnalysisEngineProcessException(e);
} catch (JDOMException e) {
throw new AnalysisEngineProcessException(e);
} catch (IOException e) {
throw new AnalysisEngineProcessException(e);
}
int curEventId = 1;
int curTimexId = 1;
int curRelId = 1;
int docLen = jCas.getDocumentText().length();
Map<String, List<String>> questionRelations = Maps.newHashMap();
for (Element annotationsElem : dataElem.getChildren("annotations")) {
// TODO -- need mapping from id to relation
Map<String, Annotation> idToAnnotation = Maps.newHashMap();
Map<String, BinaryTextRelation> idToRelation = Maps.newHashMap();
for (Element entityElem : annotationsElem.getChildren("entity")) {
String id = removeSingleChildText(entityElem, "id", null);
Element spanElem = removeSingleChild(entityElem, "span", id);
String type = removeSingleChildText(entityElem, "type", id);
Element propertiesElem = removeSingleChild(entityElem, "properties", id);
// UIMA doesn't support disjoint spans, so take the span enclosing
// everything
int begin = Integer.MAX_VALUE;
int end = Integer.MIN_VALUE;
for (String spanString : spanElem.getText().split(";")) {
String[] beginEndStrings = spanString.split(",");
if (beginEndStrings.length != 2) {
error("span not of the format 'number,number'", id);
}
int spanBegin = Integer.parseInt(beginEndStrings[0]);
int spanEnd = Integer.parseInt(beginEndStrings[1]);
if (spanBegin < begin) {
begin = spanBegin;
}
if (spanEnd > end) {
end = spanEnd;
}
}
if(begin < 0 || end >= docLen){
error("Illegal begin or end boundary", id);
continue;
}
Annotation annotation;
if (type.equals("EVENT")) {
String docTimeRel = removeSingleChildText(propertiesElem, "DocTimeRel", id);
if (docTimeRel == null) {
error("no docTimeRel, assuming OVERLAP", id);
docTimeRel = "OVERLAP";
}
String polarity = removeSingleChildText(propertiesElem, "Polarity", id);
EventMention eventMention = new EventMention(jCas, begin, end);
Event event = new Event(jCas);
EventProperties eventProperties = new EventProperties(jCas);
eventProperties.setDocTimeRel(docTimeRel);
if (polarity.equals("POS")) {
eventProperties.setPolarity(CONST.NE_POLARITY_NEGATION_ABSENT);
} else if (polarity.equals("NEG")) {
eventProperties.setPolarity(CONST.NE_POLARITY_NEGATION_PRESENT);
} else {
error("polarity that was not POS or NEG", id);
}
eventProperties.addToIndexes();
event.setConfidence(1.0f);
event.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
event.setProperties(eventProperties);
event.setMentions(new FSArray(jCas, 1));
event.setMentions(0, eventMention);
event.addToIndexes();
eventMention.setId(curEventId++);
eventMention.setConfidence(1.0f);
eventMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION);
eventMention.setEvent(event);
eventMention.addToIndexes();
annotation = eventMention;
} else if (type.equals("TIMEX3")) {
String timeClass = removeSingleChildText(propertiesElem, "Class", id);
TimeMention timeMention = new TimeMention(jCas, begin, end);
timeMention.setId(curTimexId++);
timeMention.setTimeClass(timeClass);
timeMention.addToIndexes();
annotation = timeMention;
} else {
throw new UnsupportedOperationException("unsupported entity type: " + type);
}
// match the annotation to it's ID for later use
idToAnnotation.put(id, annotation);
// make sure all XML has been consumed
removeSingleChild(entityElem, "parentsType", id);
if (!propertiesElem.getChildren().isEmpty() || !entityElem.getChildren().isEmpty()) {
List<String> children = Lists.newArrayList();
for (Element child : propertiesElem.getChildren()) {
children.add(child.getName());
}
for (Element child : entityElem.getChildren()) {
children.add(child.getName());
}
error("unprocessed children " + children, id);
}
}
for (Element relationElem : annotationsElem.getChildren("relation")) {
String id = removeSingleChildText(relationElem, "id", null);
String type = removeSingleChildText(relationElem, "type", id);
Element propertiesElem = removeSingleChild(relationElem, "properties", id);
if (type.equals("TLINK")) {
String sourceID = removeSingleChildText(propertiesElem, "Source", id);
String targetID = removeSingleChildText(propertiesElem, "Target", id);
String tlinkType = removeSingleChildText(propertiesElem, "Type", id);
TemporalTextRelation relation = new TemporalTextRelation(jCas);
relation.setId(curRelId++);
addRelation(jCas, relation, sourceID, targetID, tlinkType, idToAnnotation, id);
idToRelation.put(id, relation);
} else if (type.equals("ALINK")) {
String sourceID = removeSingleChildText(propertiesElem, "Source", id);
String targetID = removeSingleChildText(propertiesElem, "Target", id);
String alinkType = removeSingleChildText(propertiesElem, "Type", id);
AspectualTextRelation relation = new AspectualTextRelation(jCas);
addRelation(jCas, relation, sourceID, targetID, alinkType, idToAnnotation, id);
idToRelation.put(id, relation);
} else if (type.equals("Question")){
String questionText = removeSingleChildText(propertiesElem, "Question", id);
String confidence = removeSingleChildText(propertiesElem, "Confidence", id);
String difficulty = removeSingleChildText(propertiesElem, "Difficulty", id);
String questionDescription = questionText + " - Confidence: " + confidence + " - Difficulty: " + difficulty;
List<Element> answers = propertiesElem.getChildren("Answer");
List<String> ids = new ArrayList<>();
for(Element answer : answers){
ids.add(answer.getText());
}
propertiesElem.removeChildren("Answer");
questionRelations.put(questionDescription, ids);
} else {
throw new UnsupportedOperationException("unsupported relation type: " + type);
}
// make sure all XML has been consumed
removeSingleChild(relationElem, "parentsType", id);
if (!propertiesElem.getChildren().isEmpty() || !relationElem.getChildren().isEmpty()) {
List<String> children = Lists.newArrayList();
for (Element child : propertiesElem.getChildren()) {
children.add(child.getName());
}
for (Element child : relationElem.getChildren()) {
children.add(child.getName());
}
error("unprocessed children " + children, id);
}
}
// After reading in all the relations we can create the Question annotations
for(String question : questionRelations.keySet()){
CollectionTextRelation qaRel = new CollectionTextRelation(jCas);
qaRel.setCategory(question);
List<TOP> answerList = new ArrayList<>();
for(String id : questionRelations.get(question)){
TOP answer = idToAnnotation.get(id);
if(answer == null){
answer = idToRelation.get(id);
if(answer == null){
LOGGER.error("cannot find answer for id: " + id);
}
}
answerList.add(answer);
}
qaRel.setMembers(ListFactory.buildList(jCas, answerList));
qaRel.addToIndexes();
}
}
}