odps/static/algorithms/preprocess.xml (449 lines of code) (raw):
<?xml version='1.0' encoding='UTF-8'?>
<algorithms baseClass="BaseProcessAlgorithm">
<algorithm codeName="AppendID">
<public>false</public>
<reloadFields>false</reloadFields>
<fieldChangable>false</fieldChangable>
<params>
<param name="inputTableName">
<exporter>get_input_table_name</exporter>
<inputName>input</inputName>
</param>
<param name="inputPartitions">
<exporter>get_input_partitions</exporter>
<inputName>input</inputName>
</param>
<param name="outputTableName">
<exporter>get_output_table_name</exporter>
<outputName>output</outputName>
</param>
<param name="outputPartition">
<exporter>get_output_table_partitions</exporter>
<outputName>output</outputName>
</param>
<param name="IDColName">
<alias>idCol</alias>
<value>append_id</value>
</param>
<param name="selectedColNames">
<exporter>$package_root.preprocess._customize.get_append_id_selected_cols</exporter>
<alias>selectedCols</alias>
<inputName>input</inputName>
</param>
</params>
<ports>
<port name="input">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
<schema>
<schema>{IDColName}: bigint:, {selectedColNames}</schema>
</schema>
</port>
</ports>
<metas>
<meta name="xflowName" value="AppendID"/>
<meta name="xflowProjectName" value="algo_public"/>
</metas>
</algorithm>
<algorithm codeName="Split">
<public>false</public>
<reloadFields>false</reloadFields>
<fieldChangable>false</fieldChangable>
<params>
<param name="inputTableName">
<exporter>get_input_table_name</exporter>
<inputName>input</inputName>
</param>
<param name="inputTablePartitions">
<exporter>get_input_partitions</exporter>
<inputName>input</inputName>
</param>
<param name="output1TableName">
<exporter>get_output_table_name</exporter>
<outputName>output1</outputName>
</param>
<param name="output1TablePartition">
<exporter>get_output_table_partitions</exporter>
<outputName>output1</outputName>
</param>
<param name="output2TableName">
<exporter>get_output_table_name</exporter>
<outputName>output2</outputName>
</param>
<param name="output2TablePartition">
<exporter>get_output_table_partitions</exporter>
<outputName>output2</outputName>
</param>
<param name="fraction" />
<param name="randomSeed" />
</params>
<ports>
<port name="input">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
<port name="output1">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
<port name="output2">
<ioType>OUTPUT</ioType>
<sequence>2</sequence>
<type>DATA</type>
</port>
</ports>
<metas>
<meta name="xflowName" value="Split"/>
<meta name="xflowProjectName" value="algo_public"/>
</metas>
</algorithm>
<algorithm codeName="RandomSample">
<public>false</public>
<reloadFields>false</reloadFields>
<fieldChangable>false</fieldChangable>
<params>
<param name="inputTableName">
<exporter>get_input_table_name</exporter>
<inputName>input</inputName>
</param>
<param name="inputTablePartitions">
<exporter>get_input_partitions</exporter>
<inputName>input</inputName>
</param>
<param name="outputTableName">
<exporter>get_output_table_name</exporter>
<outputName>output</outputName>
</param>
<param name="sampleSize" />
<param name="sampleRatio" />
<param name="replace" />
<param name="randomSeed" />
</params>
<ports>
<port name="input">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
</ports>
<metas>
<meta name="xflowName" value="RandomSample"/>
<meta name="xflowProjectName" value="algo_public"/>
</metas>
</algorithm>
<algorithm codeName="WeightedSample">
<public>false</public>
<reloadFields>false</reloadFields>
<fieldChangable>false</fieldChangable>
<params>
<param name="inputTableName">
<exporter>get_input_table_name</exporter>
<inputName>input</inputName>
</param>
<param name="inputTablePartitions">
<exporter>get_input_partitions</exporter>
<inputName>input</inputName>
</param>
<param name="outputTableName">
<exporter>get_output_table_name</exporter>
<outputName>output</outputName>
</param>
<param name="sampleSize" />
<param name="sampleRatio" />
<param name="probCol" />
<param name="replace" />
<param name="randomSeed" />
</params>
<ports>
<port name="input">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
</ports>
<metas>
<meta name="xflowName" value="WeightedSample"/>
<meta name="xflowProjectName" value="algo_public"/>
</metas>
</algorithm>
<algorithm codeName="StratifiedSample">
<public>false</public>
<reloadFields>false</reloadFields>
<fieldChangable>false</fieldChangable>
<params>
<param name="inputTableName">
<exporter>get_input_table_name</exporter>
<inputName>input</inputName>
</param>
<param name="inputTablePartitions">
<exporter>get_input_partitions</exporter>
<inputName>input</inputName>
</param>
<param name="outputTableName">
<exporter>get_output_table_name</exporter>
<outputName>output</outputName>
</param>
<param name="sampleSize" />
<param name="sampleRatio" />
<param name="strataColName" />
<param name="randomSeed" />
</params>
<ports>
<port name="input">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
</ports>
<metas>
<meta name="xflowName" value="StratifiedSample"/>
<meta name="xflowProjectName" value="algo_public"/>
</metas>
</algorithm>
<algorithm codeName="binning">
<enabled>false</enabled>
<reloadFields>false</reloadFields>
<fieldChangable>false</fieldChangable>
<params>
<param name="inputTableName">
<exporter>get_input_table_name</exporter>
<inputName>feature</inputName>
</param>
<param name="inputPartitions">
<exporter>get_input_partitions</exporter>
<inputName>feature</inputName>
</param>
<param name="outputTableName">
<exporter>get_output_table_name</exporter>
<outputName>output</outputName>
</param>
<param name="selectedColNames">
<alias>cols</alias>
<exporter>get_feature_columns</exporter>
<inputName>feature</inputName>
</param>
<param name="labelColName">
<alias>labelCol</alias>
<exporter>get_label_column</exporter>
<inputName>feature</inputName>
</param>
<param name="nDivide">
<value>10</value>
</param>
<param name="isLeftOpen"/>
<param name="stringThreshold">
<value>-1</value>
</param>
<param name="positiveLabel">
<value>1</value>
</param>
<param name="inputBinTableName">
<exporter>get_input_table_name</exporter>
<inputName>bins</inputName>
</param>
<param name="binningMethod"/>
</params>
<ports>
<port name="feature">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
<port name="bins">
<ioType>INPUT</ioType>
<sequence>2</sequence>
<type>DATA</type>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
<schema>
<schema>feature: string, json: string</schema>
</schema>
</port>
</ports>
<metas>
<meta name="xflowName" value="binning"/>
<meta name="xflowProjectName" value="algo_public"/>
</metas>
</algorithm>
<algorithm codeName="binning_training">
<baseClass>BaseTrainingAlgorithm</baseClass>
<reloadFields>false</reloadFields>
<fieldChangable>false</fieldChangable>
<enabled>false</enabled>
<params>
<param name="featureColNames">
<alias>featureCols</alias>
<exporter>get_feature_columns</exporter>
<inputName>feature</inputName>
</param>
<param name="labelColName">
<alias>labelCol</alias>
<exporter>get_label_column</exporter>
<inputName>feature</inputName>
</param>
<param name="optimization">
<value>barrier_method</value>
</param>
<param name="loss">
<value>logistic_regression</value>
</param>
<param name="iterations">
<value>100</value>
</param>
<param name="l1Weight">
<value>0</value>
</param>
<param name="l2Weight">
<value>0</value>
</param>
<param name="m">
<value>10</value>
</param>
<param name="convergenceTolerance">
<value>0.000001</value>
</param>
<param name="positiveLabel">
<value>1</value>
</param>
<param name="inputTableName" required="true">
<exporter>get_input_table_name</exporter>
<inputName>feature</inputName>
</param>
<param name="inputTablePartitions">
<exporter>get_input_partitions</exporter>
<inputName>feature</inputName>
</param>
<param name="outputTableName">
<exporter>get_output_model_table_name(table_name=model)</exporter>
<outputName>output</outputName>
</param>
<param name="inputBinTableName">
<exporter>get_input_table_name</exporter>
<inputName>bins</inputName>
</param>
</params>
<ports>
<port name="feature">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
</port>
<port name="bins">
<ioType>INPUT</ioType>
<sequence>2</sequence>
<type>DATA</type>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>MODEL</type>
<model>
<type>TablesModel</type>
<schemas>
<schema name="model">
<schema>feaname: string, binid: bigint, bin: string, constraint: string, weight: double,
scaled_weight: bigint, woe: double, constribution: double, total: bigint, positive:
bigint, negative: bigint, percetage_pos: double, percetage_neg: double
</schema>
</schema>
<schema name="bins">
<copyInput>bins</copyInput>
<directCopy>bins</directCopy>
</schema>
</schemas>
</model>
</port>
</ports>
<metas>
<meta name="xflowName" value="linear_model"/>
<meta name="xflowProjectName" value="algo_public"/>
<meta name="predictor" value="binning_predict"/>
</metas>
</algorithm>
<algorithm codeName="binning_predict">
<public>false</public>
<reloadFields>false</reloadFields>
<enabled>false</enabled>
<params>
<param name="inputFeatureTableName">
<exporter>get_input_table_name</exporter>
<inputName>feature</inputName>
</param>
<param name="inputFeatureTablePartitions">
<exporter>get_input_partitions</exporter>
<inputName>feature</inputName>
</param>
<param name="inputBinTableName">
<exporter>get_input_model_table_name(table_name=bins)</exporter>
<inputName>model</inputName>
</param>
<param name="outputTableName">
<exporter>get_output_table_name</exporter>
<outputName>output</outputName>
</param>
<param name="inputModelTableName">
<exporter>get_input_model_table_name(table_name=model)</exporter>
<inputName>model</inputName>
</param>
<param name="featureColNames">
<alias>featureCols</alias>
<exporter>get_feature_columns</exporter>
<inputName>feature</inputName>
</param>
<param name="metaColNames">
<exporter>get_original_columns</exporter>
<inputName>feature</inputName>
</param>
</params>
<ports>
<port name="model">
<ioType>INPUT</ioType>
<sequence>1</sequence>
<type>MODEL</type>
</port>
<port name="feature">
<ioType>INPUT</ioType>
<sequence>2</sequence>
<type>DATA</type>
</port>
<port name="output">
<ioType>OUTPUT</ioType>
<sequence>1</sequence>
<type>DATA</type>
<schema>
<schema>{metaColNames}, prediction_score: double, prediction_prob: double, prediction_detail: string</schema>
</schema>
</port>
</ports>
<metas>
<meta name="xflowName" value="lm_predict"/>
<meta name="xflowProjectName" value="algo_public"/>
</metas>
</algorithm>
</algorithms>