in ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java [590:770]
public void train(CollectionReader collectionReader, File directory) throws Exception {
if(options.noCleartk) return;
AggregateBuilder builder = new AggregateBuilder();
AnalysisEngineDescription goldCopierIdentifiedAnnotsAnnotator = AnalysisEngineFactory.createEngineDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
builder.add(goldCopierIdentifiedAnnotsAnnotator);
AnalysisEngineDescription goldCopierSupportingAnnotsAnnotator = AnalysisEngineFactory.createEngineDescription(ReferenceSupportingAnnotationsSystemToGoldCopier.class);
builder.add(goldCopierSupportingAnnotsAnnotator);
AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createEngineDescription(ReferenceAnnotationsSystemAssertionClearer.class);
builder.add(assertionAttributeClearerAnnotator);
// Set up Feature Selection parameters
Float featureSelectionThreshold = options.featureSelectionThreshold;
Class<? extends DataWriter<String>> dataWriterClassFirstPass = getDataWriterClass();
if (options.featureSelectionThreshold==null) {
featureSelectionThreshold = 0f;
}
// Add each assertion Analysis Engine to the pipeline!
builder.add(AnalysisEngineFactory.createEngineDescription(AlternateCuePhraseAnnotator.class ) );
if (!options.ignorePolarity)
{
AnalysisEngineDescription polarityAnnotator;
if (options.useYtexNegation) {
polarityAnnotator = AnalysisEngineFactory.createEngineDescription(YTEX_NEGATION_DESCRIPTOR);
} else {
if (options.feda) {
polarityAnnotator = AnalysisEngineFactory.createEngineDescription(PolarityFedaCleartkAnalysisEngine.class);
ConfigurationParameterFactory.addConfigurationParameters(
polarityAnnotator,
AssertionCleartkAnalysisEngine.FILE_TO_DOMAIN_MAP,
options.trainDirectory
);
} else {
// default: cleartk-based polarity, no domain adaptation
polarityAnnotator = AnalysisEngineFactory.createEngineDescription(PolarityCleartkAnalysisEngine.class); //, this.additionalParamemters);
}
ConfigurationParameterFactory.addConfigurationParameters(
polarityAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "polarity").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
PolarityCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "polarity")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold,
AssertionCleartkAnalysisEngine.PARAM_FEATURE_CONFIG,
options.featConfig,
AssertionCleartkAnalysisEngine.PARAM_PORTION_OF_DATA_TO_USE,
(float) options.portionOfDataToUse
);
}
builder.add(polarityAnnotator);
}
if (!options.ignoreConditional)
{
AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createEngineDescription(ConditionalCleartkAnalysisEngine.class); //, this.additionalParamemters);
ConfigurationParameterFactory.addConfigurationParameters(
conditionalAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "conditional").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
ConditionalCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "conditional")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold
);
builder.add(conditionalAnnotator);
}
if (!options.ignoreUncertainty)
{
AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createEngineDescription(UncertaintyCleartkAnalysisEngine.class); //, this.additionalParamemters);
ConfigurationParameterFactory.addConfigurationParameters(
uncertaintyAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "uncertainty").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
UncertaintyCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "uncertainty")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold,
AssertionCleartkAnalysisEngine.PARAM_FEATURE_CONFIG,
options.featConfig
);
builder.add(uncertaintyAnnotator);
}
if (!options.ignoreSubject)
{
AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createEngineDescription(SubjectCleartkAnalysisEngine.class); //, this.additionalParamemters);
ConfigurationParameterFactory.addConfigurationParameters(
subjectAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "subject").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
SubjectCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "subject")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold
);
builder.add(subjectAnnotator);
}
if (!options.ignoreGeneric)
{
AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createEngineDescription(GenericCleartkAnalysisEngine.class); //, this.additionalParamemters);
ConfigurationParameterFactory.addConfigurationParameters(
genericAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "generic").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
GenericCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "generic")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold
);
builder.add(genericAnnotator);
}
// 2/20/13 srh adding
if (!options.ignoreHistory) {
AnalysisEngineDescription historyAnnotator = AnalysisEngineFactory.createEngineDescription(HistoryCleartkAnalysisEngine.class);
ConfigurationParameterFactory.addConfigurationParameters(
historyAnnotator,
AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
AssertionEvaluation.GOLD_VIEW_NAME,
// CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
// this.dataWriterFactoryClass.getName(),
DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
dataWriterClassFirstPass,
DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
new File(directory, "historyOf").getPath(),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
HistoryCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "historyOf")),
AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
featureSelectionThreshold
);
builder.add(historyAnnotator);
}
SimplePipeline.runPipeline(collectionReader, builder.createAggregateDescription());
//HideOutput hider = new HideOutput();
for (String currentAssertionAttribute : annotationTypes)
{
File currentDirectory = new File(directory, currentAssertionAttribute);
trainAndPackage(currentAssertionAttribute, currentDirectory, trainingArguments);
}
//hider.restoreOutput();
}