ctakes-dependency-parser/desc/analysis_engine/ClearNLPSRLTokenizedInfPosAggregate.xml (516 lines of code) (raw):
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>false</primitive>
<delegateAnalysisEngineSpecifiers>
<delegateAnalysisEngine key="ClearNLPDependencyParserAE">
<analysisEngineDescription>
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE</annotatorImplementationName>
<analysisEngineMetaData>
<name>ClearNLPDependencyParserAE</name>
<description>Descriptor automatically generated by uimaFIT</description>
<version>unknown</version>
<vendor>org.apache.ctakes.dependency.parser.ae</vendor>
<configurationParameters>
<configurationParameter>
<name>ParserModelFileName</name>
<description>This parameter provides the file name of the dependency parser model required by the factory method provided by ClearNLPUtil. If not specified, this analysis engine will use a default model from the resources directory</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>ParserAlgorithmName</name>
<description>This parameter provides the algorithm name used by the dependency parser that is required by the factory method provided by ClearNLPUtil. If in doubt, do not change from the default value.</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>UseLemmatizer</name>
<description>If true, use the default ClearNLP lemmatizer, otherwise use lemmas from the BaseToken normalizedToken field</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>ParserAlgorithmName</name>
<value>
<string>shift-pop</string>
</value>
</nameValuePair>
<nameValuePair>
<name>UseLemmatizer</name>
<value>
<boolean>true</boolean>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<imports>
<import name="org.apache.ctakes.typesystem.types.TypeSystem"/>
</imports>
</typeSystemDescription>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs>
<feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:partOfSpeech</feature>
<feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:normalizedForm</feature>
<feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:tokenNumber</feature>
<feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:end</feature>
<feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:begin</feature>
</inputs>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
</analysisEngineDescription>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="LVG Annotator">
<taeDescription>
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.ctakes.lvg.ae.LvgBaseTokenAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>LVG Annotator</name>
<description/>
<version/>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>UseSegments</name>
<description>Flag whether to use segments or full doc text.</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>SegmentsToSkip</name>
<description>Segments to skip.</description>
<type>String</type>
<multiValued>true</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>UseCmdCache</name>
<description>Flag whether to use LVG cache.</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>CmdCacheFileLocation</name>
<description>Location of LVG cache file containing LvgCmdApi data.</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>CmdCacheFrequencyCutoff</name>
<description>Cutoff frequency for items that get loaded into RAM from cache.</description>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>ExclusionSet</name>
<description>Set of words that LVG should not run on.</description>
<type>String</type>
<multiValued>true</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>XeroxTreebankMap</name>
<type>String</type>
<multiValued>true</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>PostLemmas</name>
<description>This parameter determines whether the feature lemmaEntries will be populated for word annotations.</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>UseLemmaCache</name>
<description>This parameter determines whether a cache will be used to improve perfomance of setting lemma entries.</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>LemmaCacheFileLocation</name>
<description>This parameter determines where the lemma cache is located.</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>LemmaCacheFrequencyCutoff</name>
<description>This parameter sets a threshold for the frequency of a lemma to be loaded into the cache.</description>
<type>Integer</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>UseSegments</name>
<value>
<boolean>false</boolean>
</value>
</nameValuePair>
<nameValuePair>
<name>SegmentsToSkip</name>
<value>
<array>
<string/>
</array>
</value>
</nameValuePair>
<nameValuePair>
<name>UseCmdCache</name>
<value>
<boolean>false</boolean>
</value>
</nameValuePair>
<nameValuePair>
<name>CmdCacheFileLocation</name>
<value>
<string>org/apache/ctakes/lvg/2005_norm.voc</string>
</value>
</nameValuePair>
<nameValuePair>
<name>CmdCacheFrequencyCutoff</name>
<value>
<integer>20</integer>
</value>
</nameValuePair>
<nameValuePair>
<name>ExclusionSet</name>
<value>
<array>
<string>and</string>
<string>And</string>
<string>by</string>
<string>By</string>
<string>for</string>
<string>For</string>
<string>in</string>
<string>In</string>
<string>of</string>
<string>Of</string>
<string>on</string>
<string>On</string>
<string>the</string>
<string>The</string>
<string>to</string>
<string>To</string>
<string>with</string>
<string>With</string>
</array>
</value>
</nameValuePair>
<nameValuePair>
<name>XeroxTreebankMap</name>
<value>
<array>
<string>adj|JJ</string>
<string>adv|RB</string>
<string>aux|AUX</string>
<string>compl|CS</string>
<string>conj|CC</string>
<string>det|DET</string>
<string>modal|MD</string>
<string>noun|NN</string>
<string>prep|IN</string>
<string>pron|PRP</string>
<string>verb|VB</string>
</array>
</value>
</nameValuePair>
<nameValuePair>
<name>LemmaCacheFileLocation</name>
<value>
<string>org/apache/ctakes/lvg/2005_lemma.voc</string>
</value>
</nameValuePair>
<nameValuePair>
<name>UseLemmaCache</name>
<value>
<boolean>false</boolean>
</value>
</nameValuePair>
<nameValuePair>
<name>LemmaCacheFrequencyCutoff</name>
<value>
<integer>20</integer>
</value>
</nameValuePair>
<nameValuePair>
<name>PostLemmas</name>
<value>
<boolean>false</boolean>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription/>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs>
<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type>
</inputs>
<outputs>
<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type>
</outputs>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<externalResourceDependencies>
<externalResourceDependency>
<key>LvgCmdApi</key>
<description/>
<interfaceName>org.apache.ctakes.lvg.resource.LvgCmdApiResource</interfaceName>
<optional>false</optional>
</externalResourceDependency>
</externalResourceDependencies>
<resourceManagerConfiguration>
<externalResources>
<externalResource>
<name>LvgCmdApi</name>
<description/>
<fileResourceSpecifier>
<fileUrl>file:org/apache/ctakes/lvg/data/config/lvg.properties</fileUrl>
</fileResourceSpecifier>
<implementationName>org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl</implementationName>
</externalResource>
</externalResources>
<externalResourceBindings>
<externalResourceBinding>
<key>LvgCmdApi</key>
<resourceName>LvgCmdApi</resourceName>
</externalResourceBinding>
</externalResourceBindings>
</resourceManagerConfiguration>
</taeDescription>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="ClearNLPSemanticRoleLabelerAE">
<analysisEngineDescription>
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE</annotatorImplementationName>
<analysisEngineMetaData>
<name>ClearNLPSemanticRoleLabelerAE</name>
<description>Descriptor automatically generated by uimaFIT</description>
<version>unknown</version>
<vendor>org.apache.ctakes.dependency.parser.ae</vendor>
<configurationParameters>
<configurationParameter>
<name>ParserModelFileName</name>
<description>This parameter provides the file name of the semantic role labeler model required by the factory method provided by ClearNLPUtil.</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>UseLemmatizer</name>
<description>If true, use the default ClearNLP lemmatizer, otherwise use lemmas from the BaseToken normalizedToken field</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>UseLemmatizer</name>
<value>
<boolean>true</boolean>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<imports>
<import name="org.apache.ctakes.typesystem.types.TypeSystem"/>
</imports>
</typeSystemDescription>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs>
<feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:partOfSpeech</feature>
<feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:tokenNumber</feature>
<feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:end</feature>
<feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:begin</feature>
<type>org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode</type>
</inputs>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
</analysisEngineDescription>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="SimpleSegmentAnnotator">
<taeDescription>
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.ctakes.core.ae.SimpleSegmentAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
<name>SimpleSegmentAnnotator</name>
<description>Creates a single Segment annotation, encompassing the entire document. For use prior to annotators that require a Segment annotation, when the input document is not in CDA/does not have another annotator that creates Segment annotations.</description>
<version>2.2</version>
<vendor>Mayo Clinic</vendor>
<configurationParameters>
<configurationParameter>
<name>SegmentID</name>
<description/>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings/>
<typeSystemDescription/>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs>
<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Segment</type>
</outputs>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</taeDescription>
</delegateAnalysisEngine>
<delegateAnalysisEngine key="POSTagger">
<analysisEngineDescription>
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>org.apache.ctakes.postagger.POSTagger</annotatorImplementationName>
<analysisEngineMetaData>
<name>POSTagger</name>
<description/>
<version>1.0</version>
<vendor/>
<configurationParameters>
<configurationParameter>
<name>PosModelFile</name>
<description>The file that contains the MaxEnt model used by the part of speech (POS) tagger</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>TagDictionary</name>
<description>The file contains a list of words, and for each word, the set of part of speech tags to be considered for that word. For words in the tag dictionary, only the corresponding tags in the tag dictionary are considered when tagging the word with a part of speech.</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>CaseSensitive</name>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>PosModelFile</name>
<value>
<string>org/apache/ctakes/postagger/models/mayo-pos.zip</string>
</value>
</nameValuePair>
<nameValuePair>
<name>CaseSensitive</name>
<value>
<boolean>true</boolean>
</value>
</nameValuePair>
<nameValuePair>
<name>TagDictionary</name>
<value>
<string>org/apache/ctakes/postagger/models/tag.dictionary.txt</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription/>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs>
<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Sentence</type>
<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
</inputs>
<outputs>
<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
</outputs>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>
</delegateAnalysisEngine>
</delegateAnalysisEngineSpecifiers>
<analysisEngineMetaData>
<name/>
<configurationParameters/>
<configurationParameterSettings/>
<flowConstraints>
<fixedFlow>
<node>SimpleSegmentAnnotator</node>
<node>LVG Annotator</node>
<node>POSTagger</node>
<node>ClearNLPDependencyParserAE</node>
<node>ClearNLPSemanticRoleLabelerAE</node>
</fixedFlow>
</flowConstraints>
<capabilities/>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
</analysisEngineDescription>