in OpenCalaisAnnotator/src/main/java/org/apache/uima/annotator/calais/OpenCalaisAnnotator.java [120:213]
public void process(CAS aCas) throws AnalysisEngineProcessException {
try {
String modifiedText = aCas.getDocumentText();
// open connection and send data
InputStream serviceInputStream = callServiceOnText(modifiedText);
// result is an XML that contains the RDF XML result
// first get the RDF XML result out of the returned XML
DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
BufferedInputStream in = new BufferedInputStream(serviceInputStream);
Document feedDoc = docBuilder.parse(in);
in.close();
String RdfXmlContent = feedDoc.getDocumentElement().getTextContent();
// System.out.println(RdfXmlContent);
// create new InputStream for the RDF XML content
BufferedInputStream bufByteIn = new BufferedInputStream(new ByteArrayInputStream(
RdfXmlContent.getBytes(feedDoc.getXmlEncoding())));
// create SAX handler
HashMap<String, DescriptionElement> elements = new HashMap<String, DescriptionElement>();
ArrayList<DescriptionElement> subjectMap = new ArrayList<DescriptionElement>();
Offset offset = new Offset();
RDFSaxHandler saxHandler = new RDFSaxHandler(elements, subjectMap, offset);
// parse RDF XML content returned by the calais service
this.saxParser.parse(bufByteIn, saxHandler);
//check offset correction
String text = aCas.getDocumentText();
ArrayList<Integer> positionsList = new ArrayList<Integer>();
int index = -1;
for(int i = 0; i < this.charsToReplace.length; i++) {
index = text.indexOf(this.charsToReplace[i]);
while(index > -1) {
positionsList.add(index);
index = text.indexOf(this.charsToReplace[i],index + 1);
}
}
//now the positions list contains all positions where characters have been removed
Integer[] positions = positionsList.toArray(new Integer[]{});
Arrays.sort(positions);
// analyze entities
Iterator<DescriptionElement> elementIt = subjectMap.iterator();
while (elementIt.hasNext()) {
DescriptionElement element = elementIt.next();
// retrieve subject URL, the subject URL must be equal to an about URL in the elements
// map to get the type of the current element
DescriptionElement typeElement = elements.get(element.getSubjectURL());
String typeURL = typeElement.getTypeURL();
// get current CAS type for the type URL
Type currentType = this.typeMapping.get(typeURL);
//if mapping is available, create an annotation
if (currentType != null) {
// get reference element that contains the annotation span
int begin = 0;
if (element.getOffset()>0) {
begin = element.getOffset() - offset.getOffset() - 1;
}
//make begin offset correction
for(int i = 0; i < positions.length; i++) {
Integer pos = positions[i];
if(pos < begin) {
begin++;
}
}
int end = begin + element.getLength();
// create annotation
if (end - begin > 0) {
AnnotationFS annotFs = aCas.createAnnotation(currentType, begin, end);
annotFs.setStringValue(this.calaisTypeFeat, element.getTypeURL().intern());
aCas.addFsToIndexes(annotFs);
}
}
}
} catch (IOException ex) {
throw new AnalysisEngineProcessException(ex);
} catch (SAXException ex) {
throw new AnalysisEngineProcessException(ex);
} catch (ParserConfigurationException ex) {
throw new AnalysisEngineProcessException(ex);
}
}