ctakes-dependency-parser/desc/analysis_engine/ClearNLPSRLTokenizedInfPosAggregate.xml

<?xml version="1.0" encoding="UTF-8"?>  <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier"> <frameworkImplementation>org.apache.uima.java</frameworkImplementation> <primitive>false</primitive> <delegateAnalysisEngineSpecifiers> <delegateAnalysisEngine key="ClearNLPDependencyParserAE"> <analysisEngineDescription> <frameworkImplementation>org.apache.uima.java</frameworkImplementation> <primitive>true</primitive> <annotatorImplementationName>org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE</annotatorImplementationName> <analysisEngineMetaData> <name>ClearNLPDependencyParserAE</name> <description>Descriptor automatically generated by uimaFIT</description> <version>unknown</version> <vendor>org.apache.ctakes.dependency.parser.ae</vendor> <configurationParameters> <configurationParameter> <name>ParserModelFileName</name> <description>This parameter provides the file name of the dependency parser model required by the factory method provided by ClearNLPUtil. If not specified, this analysis engine will use a default model from the resources directory</description> <type>String</type> <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> <configurationParameter> <name>ParserAlgorithmName</name> <description>This parameter provides the algorithm name used by the dependency parser that is required by the factory method provided by ClearNLPUtil. If in doubt, do not change from the default value.</description> <type>String</type> <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> <configurationParameter> <name>UseLemmatizer</name> <description>If true, use the default ClearNLP lemmatizer, otherwise use lemmas from the BaseToken normalizedToken field</description> <type>Boolean</type> <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> </configurationParameters> <configurationParameterSettings> <nameValuePair> <name>ParserAlgorithmName</name> <value> <string>shift-pop</string> </value> </nameValuePair> <nameValuePair> <name>UseLemmatizer</name> <value> <boolean>true</boolean> </value> </nameValuePair> </configurationParameterSettings> <typeSystemDescription> <imports> <import name="org.apache.ctakes.typesystem.types.TypeSystem"/> </imports> </typeSystemDescription> <fsIndexCollection/> <capabilities> <capability> <inputs> <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:partOfSpeech</feature> <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:normalizedForm</feature> <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:tokenNumber</feature> <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:end</feature> <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:begin</feature> </inputs> <outputs/> <languagesSupported/> </capability> </capabilities> <operationalProperties> <modifiesCas>true</modifiesCas> <multipleDeploymentAllowed>true</multipleDeploymentAllowed> <outputsNewCASes>false</outputsNewCASes> </operationalProperties> </analysisEngineMetaData> </analysisEngineDescription> </delegateAnalysisEngine> <delegateAnalysisEngine key="LVG Annotator"> <taeDescription> <frameworkImplementation>org.apache.uima.java</frameworkImplementation> <primitive>true</primitive> <annotatorImplementationName>org.apache.ctakes.lvg.ae.LvgBaseTokenAnnotator</annotatorImplementationName> <analysisEngineMetaData> <name>LVG Annotator</name> <description/> <version/> <vendor/> <configurationParameters> <configurationParameter> <name>UseSegments</name> <description>Flag whether to use segments or full doc text.</description> <type>Boolean</type> <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> <configurationParameter> <name>SegmentsToSkip</name> <description>Segments to skip.</description> <type>String</type> <multiValued>true</multiValued> <mandatory>false</mandatory> </configurationParameter> <configurationParameter> <name>UseCmdCache</name> <description>Flag whether to use LVG cache.</description> <type>Boolean</type> <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> <configurationParameter> <name>CmdCacheFileLocation</name> <description>Location of LVG cache file containing LvgCmdApi data.</description> <type>String</type> <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> <configurationParameter> <name>CmdCacheFrequencyCutoff</name> <description>Cutoff frequency for items that get loaded into RAM from cache.</description> <type>Integer</type> <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> <configurationParameter> <name>ExclusionSet</name> <description>Set of words that LVG should not run on.</description> <type>String</type> <multiValued>true</multiValued> <mandatory>false</mandatory> </configurationParameter> <configurationParameter> <name>XeroxTreebankMap</name> <type>String</type> <multiValued>true</multiValued> <mandatory>true</mandatory> </configurationParameter> <configurationParameter> <name>PostLemmas</name> <description>This parameter determines whether the feature lemmaEntries will be populated for word annotations.</description> <type>Boolean</type> <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> <configurationParameter> <name>UseLemmaCache</name> <description>This parameter determines whether a cache will be used to improve perfomance of setting lemma entries.</description> <type>Boolean</type> <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> <configurationParameter> <name>LemmaCacheFileLocation</name> <description>This parameter determines where the lemma cache is located.</description> <type>String</type> <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> <configurationParameter> <name>LemmaCacheFrequencyCutoff</name> <description>This parameter sets a threshold for the frequency of a lemma to be loaded into the cache.</description> <type>Integer</type> <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> </configurationParameters> <configurationParameterSettings> <nameValuePair> <name>UseSegments</name> <value> <boolean>false</boolean> </value> </nameValuePair> <nameValuePair> <name>SegmentsToSkip</name> <value> <array> <string/> </array> </value> </nameValuePair> <nameValuePair> <name>UseCmdCache</name> <value> <boolean>false</boolean> </value> </nameValuePair> <nameValuePair> <name>CmdCacheFileLocation</name> <value> <string>org/apache/ctakes/lvg/2005_norm.voc</string> </value> </nameValuePair> <nameValuePair> <name>CmdCacheFrequencyCutoff</name> <value> <integer>20</integer> </value> </nameValuePair> <nameValuePair> <name>ExclusionSet</name> <value> <array> <string>and</string> <string>And</string> <string>by</string> <string>By</string> <string>for</string> <string>For</string> <string>in</string> <string>In</string> <string>of</string> <string>Of</string> <string>on</string> <string>On</string> <string>the</string> <string>The</string> <string>to</string> <string>To</string> <string>with</string> <string>With</string> </array> </value> </nameValuePair> <nameValuePair> <name>XeroxTreebankMap</name> <value> <array> <string>adj|JJ</string> <string>adv|RB</string> <string>aux|AUX</string> <string>compl|CS</string> <string>conj|CC</string> <string>det|DET</string> <string>modal|MD</string> <string>noun|NN</string> <string>prep|IN</string> <string>pron|PRP</string> <string>verb|VB</string> </array> </value> </nameValuePair> <nameValuePair> <name>LemmaCacheFileLocation</name> <value> <string>org/apache/ctakes/lvg/2005_lemma.voc</string> </value> </nameValuePair> <nameValuePair> <name>UseLemmaCache</name> <value> <boolean>false</boolean> </value> </nameValuePair> <nameValuePair> <name>LemmaCacheFrequencyCutoff</name> <value> <integer>20</integer> </value> </nameValuePair> <nameValuePair> <name>PostLemmas</name> <value> <boolean>false</boolean> </value> </nameValuePair> </configurationParameterSettings> <typeSystemDescription/> <typePriorities/> <fsIndexCollection/> <capabilities> <capability> <inputs> <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type> </inputs> <outputs> <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken</type> </outputs> <languagesSupported/> </capability> </capabilities> <operationalProperties> <modifiesCas>true</modifiesCas> <multipleDeploymentAllowed>true</multipleDeploymentAllowed> <outputsNewCASes>false</outputsNewCASes> </operationalProperties> </analysisEngineMetaData> <externalResourceDependencies> <externalResourceDependency> <key>LvgCmdApi</key> <description/> <interfaceName>org.apache.ctakes.lvg.resource.LvgCmdApiResource</interfaceName> <optional>false</optional> </externalResourceDependency> </externalResourceDependencies> <resourceManagerConfiguration> <externalResources> <externalResource> <name>LvgCmdApi</name> <description/> <fileResourceSpecifier> <fileUrl>file:org/apache/ctakes/lvg/data/config/lvg.properties</fileUrl> </fileResourceSpecifier> <implementationName>org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl</implementationName> </externalResource> </externalResources> <externalResourceBindings> <externalResourceBinding> <key>LvgCmdApi</key> <resourceName>LvgCmdApi</resourceName> </externalResourceBinding> </externalResourceBindings> </resourceManagerConfiguration> </taeDescription> </delegateAnalysisEngine> <delegateAnalysisEngine key="ClearNLPSemanticRoleLabelerAE"> <analysisEngineDescription> <frameworkImplementation>org.apache.uima.java</frameworkImplementation> <primitive>true</primitive> <annotatorImplementationName>org.apache.ctakes.dependency.parser.ae.ClearNLPSemanticRoleLabelerAE</annotatorImplementationName> <analysisEngineMetaData> <name>ClearNLPSemanticRoleLabelerAE</name> <description>Descriptor automatically generated by uimaFIT</description> <version>unknown</version> <vendor>org.apache.ctakes.dependency.parser.ae</vendor> <configurationParameters> <configurationParameter> <name>ParserModelFileName</name> <description>This parameter provides the file name of the semantic role labeler model required by the factory method provided by ClearNLPUtil.</description> <type>String</type> <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> <configurationParameter> <name>UseLemmatizer</name> <description>If true, use the default ClearNLP lemmatizer, otherwise use lemmas from the BaseToken normalizedToken field</description> <type>Boolean</type> <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> </configurationParameters> <configurationParameterSettings> <nameValuePair> <name>UseLemmatizer</name> <value> <boolean>true</boolean> </value> </nameValuePair> </configurationParameterSettings> <typeSystemDescription> <imports> <import name="org.apache.ctakes.typesystem.types.TypeSystem"/> </imports> </typeSystemDescription> <fsIndexCollection/> <capabilities> <capability> <inputs> <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:partOfSpeech</feature> <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:tokenNumber</feature> <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:end</feature> <feature>org.apache.ctakes.typesystem.type.syntax.BaseToken:begin</feature> <type>org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode</type> </inputs> <outputs/> <languagesSupported/> </capability> </capabilities> <operationalProperties> <modifiesCas>true</modifiesCas> <multipleDeploymentAllowed>true</multipleDeploymentAllowed> <outputsNewCASes>false</outputsNewCASes> </operationalProperties> </analysisEngineMetaData> </analysisEngineDescription> </delegateAnalysisEngine> <delegateAnalysisEngine key="SimpleSegmentAnnotator"> <taeDescription> <frameworkImplementation>org.apache.uima.java</frameworkImplementation> <primitive>true</primitive> <annotatorImplementationName>org.apache.ctakes.core.ae.SimpleSegmentAnnotator</annotatorImplementationName> <analysisEngineMetaData> <name>SimpleSegmentAnnotator</name> <description>Creates a single Segment annotation, encompassing the entire document. For use prior to annotators that require a Segment annotation, when the input document is not in CDA/does not have another annotator that creates Segment annotations.</description> <version>2.2</version> <vendor>Mayo Clinic</vendor> <configurationParameters> <configurationParameter> <name>SegmentID</name> <description/> <type>String</type> <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> </configurationParameters> <configurationParameterSettings/> <typeSystemDescription/> <typePriorities/> <fsIndexCollection/> <capabilities> <capability> <inputs/> <outputs> <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Segment</type> </outputs> <languagesSupported/> </capability> </capabilities> <operationalProperties> <modifiesCas>true</modifiesCas> <multipleDeploymentAllowed>true</multipleDeploymentAllowed> <outputsNewCASes>false</outputsNewCASes> </operationalProperties> </analysisEngineMetaData> <resourceManagerConfiguration/> </taeDescription> </delegateAnalysisEngine> <delegateAnalysisEngine key="POSTagger"> <analysisEngineDescription> <frameworkImplementation>org.apache.uima.java</frameworkImplementation> <primitive>true</primitive> <annotatorImplementationName>org.apache.ctakes.postagger.POSTagger</annotatorImplementationName> <analysisEngineMetaData> <name>POSTagger</name> <description/> <version>1.0</version> <vendor/> <configurationParameters> <configurationParameter> <name>PosModelFile</name> <description>The file that contains the MaxEnt model used by the part of speech (POS) tagger</description> <type>String</type> <multiValued>false</multiValued> <mandatory>true</mandatory> </configurationParameter> <configurationParameter> <name>TagDictionary</name> <description>The file contains a list of words, and for each word, the set of part of speech tags to be considered for that word. For words in the tag dictionary, only the corresponding tags in the tag dictionary are considered when tagging the word with a part of speech.</description> <type>String</type> <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> <configurationParameter> <name>CaseSensitive</name> <type>Boolean</type> <multiValued>false</multiValued> <mandatory>false</mandatory> </configurationParameter> </configurationParameters> <configurationParameterSettings> <nameValuePair> <name>PosModelFile</name> <value> <string>org/apache/ctakes/postagger/models/mayo-pos.zip</string> </value> </nameValuePair> <nameValuePair> <name>CaseSensitive</name> <value> <boolean>true</boolean> </value> </nameValuePair> <nameValuePair> <name>TagDictionary</name> <value> <string>org/apache/ctakes/postagger/models/tag.dictionary.txt</string> </value> </nameValuePair> </configurationParameterSettings> <typeSystemDescription/> <typePriorities/> <fsIndexCollection/> <capabilities> <capability> <inputs> <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Sentence</type> <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type> </inputs> <outputs> <type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken</type> </outputs> <languagesSupported/> </capability> </capabilities> <operationalProperties> <modifiesCas>true</modifiesCas> <multipleDeploymentAllowed>true</multipleDeploymentAllowed> <outputsNewCASes>false</outputsNewCASes> </operationalProperties> </analysisEngineMetaData> <resourceManagerConfiguration/> </analysisEngineDescription> </delegateAnalysisEngine> </delegateAnalysisEngineSpecifiers> <analysisEngineMetaData> <name/> <configurationParameters/> <configurationParameterSettings/> <flowConstraints> <fixedFlow> <node>SimpleSegmentAnnotator</node> <node>LVG Annotator</node> <node>POSTagger</node> <node>ClearNLPDependencyParserAE</node> <node>ClearNLPSemanticRoleLabelerAE</node> </fixedFlow> </flowConstraints> <capabilities/> <operationalProperties> <modifiesCas>true</modifiesCas> <multipleDeploymentAllowed>true</multipleDeploymentAllowed> <outputsNewCASes>false</outputsNewCASes> </operationalProperties> </analysisEngineMetaData> </analysisEngineDescription>

ctakes-dependency-parser/desc/analysis_engine/ClearNLPSRLTokenizedInfPosAggregate.xml (516 lines of code) (raw):