in opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java [171:230]
public void process(CAS tcas) {
FSIndex<AnnotationFS> sentenceIndex = tcas.getAnnotationIndex(mNameType);
for (AnnotationFS nameAnnotation : sentenceIndex) {
// check if the document language is supported
String language = tcas.getDocumentLanguage();
if (!NumberUtil.isLanguageSupported(language)) {
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Unsupported language: " + language);
}
continue;
}
String text = nameAnnotation.getCoveredText();
// if possible replace text with normalization from dictionary
if (mLookupDictionary != null) {
StringList tokens = new StringList(text);
String normalizedText = mLookupDictionary.get(tokens);
if (normalizedText != null) {
text = normalizedText;
}
}
String name = mStructureFeature.getRange().getName();
if (CAS.TYPE_NAME_STRING.equals(name)) {
nameAnnotation.setStringValue(mStructureFeature, text);
} else {
Number number;
try {
number = NumberUtil.parse(text, language);
} catch (ParseException e) {
if (mLogger.isLoggable(Level.WARN)) {
mLogger.log(Level.WARN, "Invalid number format: " + text);
}
continue;
}
if (CAS.TYPE_NAME_BYTE.equals(name)) {
nameAnnotation.setByteValue(mStructureFeature, number.byteValue());
} else if (CAS.TYPE_NAME_SHORT.equals(name)) {
nameAnnotation.setShortValue(mStructureFeature, number.shortValue());
} else if (CAS.TYPE_NAME_INTEGER.equals(name)) {
nameAnnotation.setIntValue(mStructureFeature, number.intValue());
} else if (CAS.TYPE_NAME_LONG.equals(name)) {
nameAnnotation.setLongValue(mStructureFeature, number.longValue());
} else if (CAS.TYPE_NAME_FLOAT.equals(name)) {
nameAnnotation.setFloatValue(mStructureFeature, number.floatValue());
} else if (CAS.TYPE_NAME_DOUBLE.equals(name)) {
nameAnnotation.setDoubleValue(mStructureFeature, number.doubleValue());
}
}
}
}