in ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaCutter.java [97:204]
public void process(JCas jcaz) throws AnalysisEngineProcessException {
JCas jcas;
try {
if (inputViewName != null) {
jcas = jcaz.getView(inputViewName);
} else {
jcas = jcaz;
}
} catch (CASException e1) {
throw new AnalysisEngineProcessException(e1.getCause());
}
// init:
String documentText = jcas.getDocumentText();
map = new int[documentText.length() + 1];
JCas modview = null;
try {
// check if view already exists:
Iterator<JCas> viewIterator = jcas.getViewIterator();
while (viewIterator.hasNext()) {
JCas jCas2 = viewIterator.next();
if (jCas2.getViewName().equals(outputViewName)) {
modview = jCas2;
getContext().getLogger().log(Level.WARNING,
"view with name \"" + outputViewName + "\" already exists.");
}
}
if (modview == null) {
modview = jcas.createView(outputViewName);
}
} catch (CASException e) {
e.printStackTrace();
return;
}
SortedSet<HtmlConverterPSpan> visibleSpansSoFar = new TreeSet<HtmlConverterPSpan>();
// process
TypeSystem typeSystem = jcas.getTypeSystem();
Type keepType = typeSystem.getType(keep);
if (keepType == null) {
Iterator<Type> typeIterator = typeSystem.getTypeIterator();
while (typeIterator.hasNext()) {
Type type = typeIterator.next();
String shortName = type.getShortName();
if (shortName.equals(keep)) {
keepType = type;
break;
}
}
if (keepType == null) {
getContext().getLogger().log(Level.WARNING, "Type \"" + keep + "\" not defined.");
return;
}
}
AnnotationIndex<AnnotationFS> annotationIndex = jcas.getCas().getAnnotationIndex(keepType);
for (AnnotationFS each : annotationIndex) {
visibleSpansSoFar.add(new HtmlConverterPSpan(each.getBegin(), each.getEnd(), each
.getCoveredText()));
}
// create new doc-text and the map from deletions and visible-text-spans:
StringBuffer sbu = new StringBuffer(documentText.length());
int originalOffsetI = 0;
int outOffset = 0;
for (HtmlConverterPSpan vis : visibleSpansSoFar) {
final int begin = vis.getBegin();
final int end = vis.getEnd();
// map text before annotation:
while (originalOffsetI < begin) {
map[originalOffsetI++] = outOffset;
}
// get and map text/replacement:
String s = "";
if (vis instanceof HtmlConverterPSpanReplacement) {
// conversion/replacement:
s = vis.getTxt();
// asserts that s is shorter than the original source
while (originalOffsetI < begin + s.length()) {
map[originalOffsetI++] = outOffset++;
}
while (originalOffsetI < end) {
map[originalOffsetI++] = outOffset;
}
} else {
// simple annotation:
s = documentText.substring(begin, end);
while (originalOffsetI < end) {
map[originalOffsetI++] = outOffset++;
}
}
sbu.append(s);
}
while (originalOffsetI < documentText.length()) {
map[originalOffsetI++] = outOffset;
}
map[documentText.length()] = outOffset + 1; // handle doc end separately
String modTxt = sbu.toString();
modview.setDocumentText(modTxt);
// copy annotations using the 'map':
try {
mapAnnotations(jcas, map, outputViewName);
} catch (CASException e) {
e.printStackTrace();
}
}