public void train()

in ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java [590:770]


  public void train(CollectionReader collectionReader, File directory) throws Exception {
    if(options.noCleartk) return;
    AggregateBuilder builder = new AggregateBuilder();
    
    AnalysisEngineDescription goldCopierIdentifiedAnnotsAnnotator = AnalysisEngineFactory.createEngineDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
    builder.add(goldCopierIdentifiedAnnotsAnnotator);
    
    AnalysisEngineDescription goldCopierSupportingAnnotsAnnotator = AnalysisEngineFactory.createEngineDescription(ReferenceSupportingAnnotationsSystemToGoldCopier.class);
    builder.add(goldCopierSupportingAnnotsAnnotator);
    
    AnalysisEngineDescription assertionAttributeClearerAnnotator = AnalysisEngineFactory.createEngineDescription(ReferenceAnnotationsSystemAssertionClearer.class);
    builder.add(assertionAttributeClearerAnnotator);
    
    // Set up Feature Selection parameters
    Float featureSelectionThreshold = options.featureSelectionThreshold;
    Class<? extends DataWriter<String>> dataWriterClassFirstPass = getDataWriterClass(); 
    if (options.featureSelectionThreshold==null) {
    	featureSelectionThreshold = 0f;
    }
    
    // Add each assertion Analysis Engine to the pipeline!
    builder.add(AnalysisEngineFactory.createEngineDescription(AlternateCuePhraseAnnotator.class ) );
    
    if (!options.ignorePolarity)
    {
    	AnalysisEngineDescription polarityAnnotator;
    	if (options.useYtexNegation) {
    		 polarityAnnotator = AnalysisEngineFactory.createEngineDescription(YTEX_NEGATION_DESCRIPTOR);
    	} else {
    		if (options.feda) {
    			polarityAnnotator = AnalysisEngineFactory.createEngineDescription(PolarityFedaCleartkAnalysisEngine.class);

      			ConfigurationParameterFactory.addConfigurationParameters(
        				polarityAnnotator,
        				AssertionCleartkAnalysisEngine.FILE_TO_DOMAIN_MAP,
        				options.trainDirectory
        				);
    		} else {
    			// default: cleartk-based polarity, no domain adaptation
    			polarityAnnotator = AnalysisEngineFactory.createEngineDescription(PolarityCleartkAnalysisEngine.class); //,  this.additionalParamemters);
      		}
    		ConfigurationParameterFactory.addConfigurationParameters(
    				polarityAnnotator,
    				AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
    				AssertionEvaluation.GOLD_VIEW_NAME,
//    				CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//    				this.dataWriterFactoryClass.getName(),
    				DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
    				dataWriterClassFirstPass,
    				DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
    				new File(directory, "polarity").getPath(),
    				AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
    				PolarityCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "polarity")),
    				AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
    				featureSelectionThreshold,
    				AssertionCleartkAnalysisEngine.PARAM_FEATURE_CONFIG,
    				options.featConfig,
    				AssertionCleartkAnalysisEngine.PARAM_PORTION_OF_DATA_TO_USE,
    				(float) options.portionOfDataToUse
    				);
    	}
		builder.add(polarityAnnotator);
    }

    if (!options.ignoreConditional)
    {
	    AnalysisEngineDescription conditionalAnnotator = AnalysisEngineFactory.createEngineDescription(ConditionalCleartkAnalysisEngine.class); //,  this.additionalParamemters);
	    ConfigurationParameterFactory.addConfigurationParameters(
	        conditionalAnnotator,
	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
	        AssertionEvaluation.GOLD_VIEW_NAME,
//	        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//	        this.dataWriterFactoryClass.getName(),
          DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
			dataWriterClassFirstPass,
	        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
	        new File(directory, "conditional").getPath(),
			AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
			ConditionalCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "conditional")),
			AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
			featureSelectionThreshold
	        );
	    builder.add(conditionalAnnotator);
    }

    if (!options.ignoreUncertainty)
    {
	    AnalysisEngineDescription uncertaintyAnnotator = AnalysisEngineFactory.createEngineDescription(UncertaintyCleartkAnalysisEngine.class); //,  this.additionalParamemters);
	    ConfigurationParameterFactory.addConfigurationParameters(
	        uncertaintyAnnotator,
	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
	        AssertionEvaluation.GOLD_VIEW_NAME,
//	        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//	        this.dataWriterFactoryClass.getName(),
          DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
			dataWriterClassFirstPass,
	        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
	        new File(directory, "uncertainty").getPath(),
			AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
			UncertaintyCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "uncertainty")),
			AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
			featureSelectionThreshold,
      AssertionCleartkAnalysisEngine.PARAM_FEATURE_CONFIG,
      options.featConfig
	        );
	    builder.add(uncertaintyAnnotator);
    }

    if (!options.ignoreSubject)
    {
	    AnalysisEngineDescription subjectAnnotator = AnalysisEngineFactory.createEngineDescription(SubjectCleartkAnalysisEngine.class); //,  this.additionalParamemters);
	    ConfigurationParameterFactory.addConfigurationParameters(
	        subjectAnnotator,
	        AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
	        AssertionEvaluation.GOLD_VIEW_NAME,
//	        CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//	        this.dataWriterFactoryClass.getName(),
          DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
			dataWriterClassFirstPass,
	        DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
	        new File(directory, "subject").getPath(),
			AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
			SubjectCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "subject")),
			AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
			featureSelectionThreshold
	        );
	    builder.add(subjectAnnotator);
    }

    if (!options.ignoreGeneric)
    {
		AnalysisEngineDescription genericAnnotator = AnalysisEngineFactory.createEngineDescription(GenericCleartkAnalysisEngine.class); //,  this.additionalParamemters);
		ConfigurationParameterFactory.addConfigurationParameters(
		    genericAnnotator,
		    AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
		    AssertionEvaluation.GOLD_VIEW_NAME,
//		    CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//		    this.dataWriterFactoryClass.getName(),
        DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
			dataWriterClassFirstPass,
		    DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
		    new File(directory, "generic").getPath(),
			AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
			GenericCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "generic")),
			AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
			featureSelectionThreshold
		    );
		builder.add(genericAnnotator);
    }
    
    // 2/20/13 srh adding
    if (!options.ignoreHistory) {
    	AnalysisEngineDescription historyAnnotator = AnalysisEngineFactory.createEngineDescription(HistoryCleartkAnalysisEngine.class);
    	ConfigurationParameterFactory.addConfigurationParameters(
    			historyAnnotator,
    			AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
    			AssertionEvaluation.GOLD_VIEW_NAME,
//    			CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
//    			this.dataWriterFactoryClass.getName(),
          DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
			dataWriterClassFirstPass,
    			DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
    			new File(directory, "historyOf").getPath(),
				AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_URI,
				HistoryCleartkAnalysisEngine.createFeatureSelectionURI(new File(directory, "historyOf")),
				AssertionCleartkAnalysisEngine.PARAM_FEATURE_SELECTION_THRESHOLD,
				featureSelectionThreshold
    			);
    	builder.add(historyAnnotator);
    }

    SimplePipeline.runPipeline(collectionReader,  builder.createAggregateDescription());
    
    //HideOutput hider = new HideOutput();
    for (String currentAssertionAttribute : annotationTypes)
    {
    	File currentDirectory = new File(directory, currentAssertionAttribute);
    	trainAndPackage(currentAssertionAttribute, currentDirectory, trainingArguments);
    }
    //hider.restoreOutput();
  }