odps/static/algorithms/feature.xml (132 lines of code) (raw):
<?xml version='1.0' encoding='UTF-8'?>
<algorithms baseClass="BaseTrainingAlgorithm">
<algorithm codeName="RFImportance">
<baseClass>BaseMetricsAlgorithm</baseClass>
<exportFunction>true</exportFunction>
<docs><![CDATA[
计算随机森林模型中特征重要性。
%params%
]]></docs>
<params>
<param name="inputTableName" required="true">
<exporter>get_input_table_name</exporter>
<inputName>input</inputName>
</param>
<param name="inputTablePartitions">
<exporter>get_input_table_partitions</exporter>
<inputName>input</inputName>
</param>
<param name="featureColNames" required="true">
<alias>featureCols</alias>
<exporter>get_feature_columns</exporter>
<inputName>input</inputName>
<docs>names of feature columns</docs>
</param>
<param name="labelColName" required="true">
<alias>labelCol</alias>
<exporter>get_label_column</exporter>
<inputName>input</inputName>
<docs>name of label column in input table</docs>
</param>
<param name="modelName" required="true">
<exporter>get_input_model_name</exporter>
<inputName>model</inputName>
</param>
<param name="outputTableName">
<exporter>get_output_table_name</exporter>
<outputName>output</outputName>
</param>
</params>
<ports>
<port name="input">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
<docs>train set with defined label</docs>
</port>
<port name="model">
<ioType>INPUT</ioType>
<sequence>2</sequence>
<type>MODEL</type>
<docs>random forests model</docs>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>MODEL</type>
<docs>trained model which can be used in prediction</docs>
</port>
</ports>
<metas>
<meta name="xflowName" value="feature_importance"/>
<meta name="xflowProjectName" value="algo_public"/>
<meta name="calculator" value="$package_root.feature._customize.get_rf_importance"/>
</metas>
</algorithm>
<algorithm codeName="GBDTImportance">
<baseClass>BaseMetricsAlgorithm</baseClass>
<exportFunction>true</exportFunction>
<docs><![CDATA[
计算 GBDT 模型中特征重要性。
%params%
]]></docs>
<params>
<param name="inputTableName" required="true">
<exporter>get_input_table_name</exporter>
<inputName>input</inputName>
</param>
<param name="inputTablePartitions">
<exporter>get_input_table_partitions</exporter>
<inputName>input</inputName>
</param>
<param name="labelColName" required="true">
<alias>labelCol</alias>
<exporter>get_label_column</exporter>
<inputName>input</inputName>
<docs>name of label column in input table</docs>
</param>
<param name="modelName" required="true">
<exporter>get_input_model_name</exporter>
<inputName>model</inputName>
</param>
<param name="outputTableName">
<exporter>get_output_table_name</exporter>
<outputName>output</outputName>
</param>
<param name="featureColNames" required="true">
<alias>featureCols</alias>
<exporter>get_feature_columns</exporter>
<inputName>input</inputName>
<docs>names of feature columns</docs>
</param>
</params>
<ports>
<port name="input">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
<docs>train set with defined label</docs>
</port>
<port name="model">
<ioType>INPUT</ioType>
<sequence>2</sequence>
<type>MODEL</type>
<docs>GBDT model</docs>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
<docs>trained model which can be used in prediction</docs>
</port>
</ports>
<metas>
<meta name="xflowName" value="gbdt_importance"/>
<meta name="xflowProjectName" value="algo_public"/>
<meta name="calculator" value="$package_root.feature._customize.get_gbdt_importance"/>
</metas>
</algorithm>
<algorithm codeName="RegressionImportance">
<baseClass>BaseMetricsAlgorithm</baseClass>
<exportFunction>true</exportFunction>
<docs><![CDATA[
计算线性和二分类回归模型中特征重要性。
%params%
]]></docs>
<params>
<param name="inputTableName" required="true">
<exporter>get_input_table_name</exporter>
<inputName>input</inputName>
</param>
<param name="inputTablePartitions">
<exporter>get_input_partitions</exporter>
<inputName>input</inputName>
</param>
<param name="labelColName" required="true">
<alias>labelCol</alias>
<exporter>get_label_column</exporter>
<inputName>input</inputName>
<docs>name of label column in input table</docs>
</param>
<param name="modelName" required="true">
<exporter>get_input_model_name</exporter>
<inputName>model</inputName>
</param>
<param name="outputTableName">
<exporter>get_output_table_name</exporter>
<outputName>output</outputName>
</param>
<param name="featureColNames">
<alias>featureCols</alias>
<exporter>get_feature_columns</exporter>
<inputName>input</inputName>
<docs>输入表中用于训练的特征的列名</docs>
</param>
<param name="itemDelimiter">
<exporter>get_item_delimiter</exporter>
<inputName>input</inputName>
<docs>item(key value对)之间的分隔符</docs>
</param>
<param name="kvDelimiter">
<exporter>get_kv_delimiter</exporter>
<inputName>input</inputName>
<docs>表中每个item的key和value之间的分隔符</docs>
</param>
<param name="enableSparse">
<exporter>get_enable_sparse</exporter>
<inputName>input</inputName>
<docs>是否稀疏数据</docs>
</param>
</params>
<ports>
<port name="input">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
<docs>train set with defined label</docs>
</port>
<port name="model">
<ioType>INPUT</ioType>
<sequence>2</sequence>
<type>MODEL</type>
<docs>regression model</docs>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>MODEL</type>
<docs>trained model which can be used in prediction</docs>
</port>
</ports>
<metas>
<meta name="xflowName" value="regression_feature_importance"/>
<meta name="xflowProjectName" value="algo_public"/>
<meta name="calculator" value="$package_root.feature._customize.get_regression_importance"/>
</metas>
</algorithm>
<algorithm codeName="SelectFeatures">
<baseClass>BaseProcessAlgorithm</baseClass>
<exportFunction>true</exportFunction>
<params>
<param name="inputTable" required="true">
<exporter>get_input_table_name</exporter>
<inputName>input</inputName>
</param>
<param name="inputTablePartitions">
<exporter>get_input_partitions</exporter>
<inputName>input</inputName>
</param>
<param name="outputTable">
<exporter>get_output_table_name</exporter>
<outputName>output</outputName>
</param>
<param name="featImportanceTable" required="true">
<exporter>get_output_table_name</exporter>
<outputName>importance</outputName>
</param>
<param name="labelCol" required="true">
<exporter>get_label_column</exporter>
<inputName>input</inputName>
<docs>标签列/目标列</docs>
</param>
<param name="selectedCols" required="true">
<alias>featureCols</alias>
<exporter>get_feature_columns</exporter>
<inputName>input</inputName>
<docs>特征列</docs>
</param>
<param name="categoryCols">
<inputName>input</inputName>
<docs>存在在Int或者Double字符的枚举特征</docs>
</param>
<param name="maxBins">
<docs>连续类特征划分最大区间数</docs>
</param>
<param name="selectMethod">
<value>iv</value>
<docs>特征选择方法</docs>
</param>
<param name="topN">
<value>10</value>
<docs>挑选的TopN个特征,如果topN答应输入特征数,则输出所有特征</docs>
</param>
<param name="itemSpliter">
<alias>itemDelimiter</alias>
<exporter>get_item_delimiter</exporter>
<inputName>input</inputName>
<docs>item(key value对)之间的分隔符</docs>
</param>
<param name="kvSpliter">
<alias>kvDelimiter</alias>
<exporter>get_kv_delimiter</exporter>
<inputName>input</inputName>
<docs>表中每个item的key和value之间的分隔符</docs>
</param>
<param name="isSparse">
<alias>enableSparse</alias>
<exporter>get_enable_sparse</exporter>
<inputName>input</inputName>
<docs>是否稀疏数据</docs>
</param>
</params>
<ports>
<port name="input">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
<docs>train set with input data</docs>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
<docs>output table with selected features</docs>
<schema>
<schema>{labelCol}</schema>
<dynamic>true</dynamic>
</schema>
</port>
<port name="importance">
<ioType>OUTPUT</ioType>
<sequence>2</sequence>
<type>DATA</type>
<docs>importance table</docs>
<schema>
<schema>featname: string, weight: double</schema>
</schema>
</port>
</ports>
<metas>
<meta name="xflowName" value="fe_select_runner"/>
<meta name="xflowProjectName" value="algo_public"/>
</metas>
</algorithm>
</algorithms>