in uimaj-tools/src/main/java/org/apache/uima/tools/docanalyzer/DocumentAnalyzer.java [1262:1450]
public void runProcessingThread(File inputDir, String inputFileFormat, Boolean lenient,
File outputDir, File aeSpecifierFile, String xmlTag, String language, String encoding) {
try {
// create and configure collection reader that will read input docs
CollectionReaderDescription collectionReaderDesc = FileSystemCollectionReader
.getDescription();
ConfigurationParameterSettings paramSettings = collectionReaderDesc.getMetaData()
.getConfigurationParameterSettings();
paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_INPUTDIR,
inputDir.getAbsolutePath());
paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_XCAS, inputFileFormat);
paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_LENIENT,
lenient ? "true" : "false");
paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_LANGUAGE, language);
paramSettings.setParameterValue(FileSystemCollectionReader.PARAM_ENCODING, encoding);
collectionReader = (FileSystemCollectionReader) UIMAFramework
.produceCollectionReader(collectionReaderDesc);
// show progress Monitor
String progressMsg = " Processing " + collectionReader.getNumberOfDocuments()
+ " Documents.";
numDocs = collectionReader.getNumberOfDocuments();
progressMonitor = new ProgressMonitor(DocumentAnalyzer.this, progressMsg, "", 0, numDocs + 2);
String initial = "Initializing.... Please wait ";
progressMonitor.setNote(initial);
progressMonitor.setMillisToPopup(-1);
progressMonitor.setMillisToDecideToPopup(-1);
numDocsProcessed = 0;
progressTimer.start();
// set wait cursor
setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
// Disable frame while processing:
setEnabled(false);
// create CPM instance that will drive processing
mCPM = UIMAFramework.newCollectionProcessingManager();
// read AE descriptor from file
XMLInputSource in = new XMLInputSource(aeSpecifierFile);
ResourceSpecifier aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
// create and configure CAS consumer that will write the output (in
// XMI format)
CasConsumerDescription casConsumerDesc = XmiWriterCasConsumer.getDescription();
ConfigurationParameterSettings consumerParamSettings = casConsumerDesc.getMetaData()
.getConfigurationParameterSettings();
consumerParamSettings.setParameterValue(XmiWriterCasConsumer.PARAM_OUTPUTDIR,
outputDir.getAbsolutePath());
// declare uima.cas.TOP as an input so that ResultSpec on user's AE will be set to produce all
// types
casConsumerDesc.getCasConsumerMetaData().getCapabilities()[0].addInputType("uima.cas.TOP",
true);
// if XML tag was specified, also create XmlDetagger annotator that handles this
AnalysisEngineDescription xmlDetaggerDesc = null;
if (xmlTag != null && xmlTag.length() > 0) {
xmlDetaggerDesc = XmlDetagger.getDescription();
ConfigurationParameterSettings xmlDetaggerParamSettings = xmlDetaggerDesc.getMetaData()
.getConfigurationParameterSettings();
xmlDetaggerParamSettings.setParameterValue(XmlDetagger.PARAM_TEXT_TAG, xmlTag);
usingXmlDetagger = true;
} else {
usingXmlDetagger = false;
}
// create an aggregate AE that includes the XmlDetagger (if needed), followed by
// th user's AE descriptor, followed by the XMI Writer CAS Consumer, using fixed flow.
// We use an aggregate AE here, rather than just adding the CAS Consumer to the CPE, so
// that we can support the user's AE being a CAS Multiplier and we can specify sofa mappings.
AnalysisEngineDescription aggDesc = UIMAFramework.getResourceSpecifierFactory()
.createAnalysisEngineDescription();
aggDesc.setPrimitive(false);
aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("UserAE", aeSpecifier);
aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("XmiWriter", casConsumerDesc);
FixedFlow flow = UIMAFramework.getResourceSpecifierFactory().createFixedFlow();
if (xmlDetaggerDesc != null) {
aggDesc.getDelegateAnalysisEngineSpecifiersWithImports().put("XmlDetagger",
xmlDetaggerDesc);
flow.setFixedFlow(new String[] { "XmlDetagger", "UserAE", "XmiWriter" });
// to run XmlDetagger we need sofa mappings
// XmlDetagger's "xmlDocument" input sofa gets mapped to the default sofa
SofaMapping sofaMapping1 = UIMAFramework.getResourceSpecifierFactory().createSofaMapping();
sofaMapping1.setComponentKey("XmlDetagger");
sofaMapping1.setComponentSofaName("xmlDocument");
sofaMapping1.setAggregateSofaName(CAS.NAME_DEFAULT_SOFA);
// for UserAE and XmiWriter, may default sofa to the "plainTextDocument" produced by the
// XmlDetagger
SofaMapping sofaMapping2 = UIMAFramework.getResourceSpecifierFactory().createSofaMapping();
sofaMapping2.setComponentKey("UserAE");
sofaMapping2.setAggregateSofaName("plainTextDocument");
SofaMapping sofaMapping3 = UIMAFramework.getResourceSpecifierFactory().createSofaMapping();
sofaMapping3.setComponentKey("XmiWriter");
sofaMapping3.setAggregateSofaName("plainTextDocument");
aggDesc.setSofaMappings(new SofaMapping[] { sofaMapping1, sofaMapping2, sofaMapping3 });
} else {
// no XML detagger needed in the aggregate in flow
flow.setFixedFlow(new String[] { "UserAE", "XmiWriter" });
}
aggDesc.getAnalysisEngineMetaData().setName("DocumentAnalyzerAE");
aggDesc.getAnalysisEngineMetaData().setFlowConstraints(flow);
aggDesc.getAnalysisEngineMetaData().getOperationalProperties()
.setMultipleDeploymentAllowed(false);
progressMonitor.setProgress(1);
// instantiate AE
// keep this a local variable - so it doesn't hang on to the ae object
// preventing gc (some ae objects are huge)
AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aggDesc);
mCPM.setAnalysisEngine(ae);
progressMonitor.setProgress(2);
// register callback listener
mCPM.addStatusCallbackListener(DocumentAnalyzer.this);
// create a CAS, including all types from all components, that
// we'll later use for deserializing XCASes
List descriptorList = new ArrayList();
descriptorList.add(collectionReaderDesc);
descriptorList.add(ae.getMetaData());
descriptorList.add(casConsumerDesc);
cas = CasCreationUtils.createCas(descriptorList);
currentTypeSystem = cas.getTypeSystem();
// save AE output types for later use in configuring viewer
if (aeSpecifier instanceof AnalysisEngineDescription) {
ArrayList<String> outputTypeList = new ArrayList<>();
Capability[] capabilities = ((AnalysisEngineDescription) aeSpecifier)
.getAnalysisEngineMetaData().getCapabilities();
for (int i = 0; i < capabilities.length; i++) {
TypeOrFeature[] outputs = capabilities[i].getOutputs();
for (int j = 0; j < outputs.length; j++) {
if (outputs[j].isType()) {
outputTypeList.add(outputs[j].getName());
// also add subsumed types
// UIMA-2565 - Clash btw. cas.Type and Window.Type on JDK 7
org.apache.uima.cas.Type t = currentTypeSystem.getType(outputs[j].getName());
if (t != null) {
List<org.apache.uima.cas.Type> subsumedTypes = currentTypeSystem
.getProperlySubsumedTypes(t);
Iterator<org.apache.uima.cas.Type> it = subsumedTypes.iterator();
while (it.hasNext()) {
outputTypeList.add(it.next().getName());
}
}
}
}
}
// always allow viewing document annotation
outputTypeList.add("uima.tcas.DocumentAnnotation");
currentTaeOutputTypes = new String[outputTypeList.size()];
outputTypeList.toArray(currentTaeOutputTypes);
} else {
currentTaeOutputTypes = null; // indicates all types should be
// selected
}
// Process (in separate thread)
mCPM.process(collectionReader);
// if the user has already clicked cancel, call the
// runner.terminate() immediately.
if (progressMonitor.isCanceled()) {
mCPM.stop();
progressMonitor.close();
}
} catch (Throwable t) {
// special check for using XML detagger with remotes, which will generate an error
// since sofa mappings aren't supported for remotes
if (usingXmlDetagger && (t instanceof UIMAException) && ((UIMAException) t).hasMessageKey(
ResourceInitializationException.SOFA_MAPPING_NOT_SUPPORTED_FOR_REMOTE)) {
displayError(
"The XML detagging feature is not supported for remote Analysis Engines or for Aggregates containing remotes. "
+ "If you are running a remote Analysis Engine the \"XML Tag Containing Text\" field must be left blank.");
} else {
displayError(t);
}
aborted();
}
}