in ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource_impl.java [780:915]
public void startElement(String uri, String local, String raw, Attributes attrs)
throws SAXException {
DictionaryToken token = null;
int length = 0;
if (raw.equals(token_elem)) { // starting new token entry
if (attrs != null) {
props = getPropertiesRoot().newEntryProperties();
int attrCount = attrs.getLength();
for (int i = 0; i < attrCount; i++) {
props.setProperty(attrs.getQName(i), convertEntities(attrs.getValue(i)));
}
}
} else if (raw.equals(variant_elem)) { // variant for current token
if (attrs != null) {
int attrCount = attrs.getLength();
ArrayList<String> tokens = new ArrayList<String>();
// if this variant contains its own POS info, save token level POS info and set props to
// contain variant's
EntryProperties variantProperties = new EntryProperties(props);
// logger.logInfo("" + entryNum++);
//
// System.err.println("" + entryNum++);
for (int i = 0; i < attrCount; i++) {
if (attrs.getQName(i).equals(key_attribute)) { // key attribute?
adaptor.runCPM(convertEntities(attrs.getValue(i)));
//if (dumpDict)
//{
// System.err.println ("Adaptor done, result size: " + result.size() + "\nresult:\n");
// for (DictionaryToken resultItem : result)
// {
// System.err.println (" " + resultItem.getText());
// }
//}
Iterator<DictionaryToken> tokenIter = result.iterator();
token = null;
while (tokenIter.hasNext()) {
token = (DictionaryToken) tokenIter.next();
//if (dumpDict)
//{
// System.err.println ("TOKEN CLASS: '" + token.getTokenClass() + "', TOKEN TYPE: '" + token.getType() + "'");
//}
if (tokenFilter.isOK_Token(token, tokenNormalizer)) {
break;
}
}
if (token == null) {
return;
}
//if (dumpDict)
//{
// System.err.println ("variant token key:" + key);
//}
tokens.add(tokenNormalizer.normalize(token.getText()));
length = 1;
while (tokenIter.hasNext()) {
token = (DictionaryToken) tokenIter.next();
String tokenText = tokenNormalizer.normalize(token.getText());
if (tokenFilter.isOK_Token(token, tokenNormalizer)) {
tokens.add(tokenText);
length++;
//if (dumpDict)
//{
// System.err.println (" variant token aux:" + tokenText);
//}
}
//else
//{
// if (dumpDict)
// {
// System.err.println (" SKIPPING: variant token aux:" + tokenText);
// }
//}
}
} else {
variantProperties.setProperty(attrs.getQName(i), convertEntities(attrs.getValue(i)));
}
}
String[] elements = (String[]) tokens.toArray(new String[tokens.size()]);
String unsorted = null;
if (sortElements) {
unsorted = stringTogetherTokens(elements);
Arrays.sort(elements);
}
//String tokenString = stringTogetherTokens(elements);
//if (dumpDict)
//{
// System.err.println ("token string: " + tokenString);
//}
// add to dictionary
if (sortElements) {
for (int i = 0; i < tokens.size(); i++) {
dict.putEntry((String) tokens.get(i), elements, unsorted, elements.length,
variantProperties);
// System.err.println ("adding props for:" + tokenString);
// Enumeration propKeys = variantProperties.keys();
// while (propKeys.hasMoreElements())
// {
// String propKey = (String) propKeys.nextElement();
// System.err.println ("\t" + propKey + ": " +
// variantProperties.getProperty(propKey));
// }
}
} else {
dict.putEntry((String) tokens.get(0), elements, unsorted, elements.length,
variantProperties);
}
term_cnt++;
if ((term_cnt % 10000) == 0) {
getLogger().logInfo("processed " + term_cnt + " entries");
//System.err.println("processed " + term_cnt + " entries");
}
}
}
}