pai_jobs/easy_rec_flow/easy_rec.xml (215 lines of code) (raw):
<?xml version="1.0" encoding="UTF-8"?>
<xflow xmlns="odps:xflow:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="easy_rec_ext" category="deep_learning" ref_resource="xxxxxxxx/resources/xxxxxxxx" comments="easy-rec" catalog="String" xsi:schemaLocation="">
<parameters>
<parameter name="config" use="optional" default=""/>
<parameter name="script" use="optional" default=""/>
<parameter name="entryFile" use="optional" default="run.py"/>
<parameter name="volumes" use="optional" default=""/>
<parameter name="buckets" use="required" default=""/>
<parameter name="tables" use="optional" default=""/>
<parameter name="outputs" use="optional" default=""/>
<parameter name="cmd" use="optional" default="train"/>
<parameter name="train_tables" use="optional" default=""/>
<parameter name="eval_tables" use="optional" default=""/>
<parameter name="boundary_table" use="optional" default=""/>
<parameter name="continue_train" use="optional" default="true"/>
<parameter name="export_dir" use="optional" default=""/>
<parameter name="cluster" use="optional"/>
<parameter name="gpuRequired" use="optional" default=""/>
<parameter name="cpuRequired" use="optional" default="1200"/>
<parameter name="memRequired" use="optional" default="30000"/>
<parameter name="with_evaluator" use="optional" default=""/>
<parameter name="eval_method" use="optional" default="separate"/>
<parameter name="distribute_strategy" use="optional" default=""/>
<!-- for train only -->
<parameter name="edit_config_json" use="optional" default=""/>
<!-- for evaluate only -->
<parameter name="checkpoint_path" use="optional" default=""/>
<parameter name="fine_tune_checkpoint" use="optional" default=""/>
<parameter name="eval_result_path" use="optional" default="eval_result.txt"/>
<!--for cmd=predict only-->
<parameter name="saved_model_dir" use="optional" default="" />
<parameter name="input_table" use="optional" default=""/>
<parameter name="output_table" use="optional" default=""/>
<parameter name="selected_cols" use="optional" default=""/>
<parameter name="excluded_cols" use="optional" default=""/>
<!-- specify ALL_COLUMNS to include all columns to the final set -->
<parameter name="reserved_cols" use="optional" default=""/>
<parameter name="lifecycle" use="optional" default="10"/>
<parameter name="output_cols" use="optional" default="probs double"/>
<parameter name="model_outputs" use="optional" default=""/>
<parameter name="batch_size" use="optional" default="32"/>
<!--for cmd=vector_retrieve only-->
<parameter name="query_table" use="optional" default=""/>
<parameter name="doc_table" use="optional" default=""/>
<parameter name="knn_distance" use="optional" default="inner_product"/>
<parameter name="knn_num_neighbours" use="optional" default=""/>
<parameter name="knn_feature_dims" use="optional" default=""/>
<parameter name="knn_index_type" use="optional" default="ivfflat"/>
<parameter name="knn_feature_delimiter" use="optional" default=","/>
<parameter name="knn_nlist" use="optional" default=""/>
<parameter name="knn_nprobe" use="optional" default=""/>
<parameter name="knn_compress_dim" use="optional" default=""/>
<!-- for hyperparameter tuning -->
<parameter name="model_dir" use="optional" default=""/>
<parameter name="hpo_param_path" use="optional" default=""/>
<parameter name="hpo_metric_save_path" use="optional" default=""/>
<parameter name="profiling_file" use="optional" default=""/>
<!-- for resources and version control -->
<parameter name="version" use="optional" default="stable"/>
<parameter name="res_project" use="optional" default="algo_public"/>
<!-- for mask feature for eval -->
<parameter name="mask_feature_name" use="optional" default=""/>
<!-- for train/evaluate/export/predict -->
<parameter name="extra_params" use="optional" default=""/>
<parameter name="useSparseClusterSchema" use="optional" default="false"/>
<parameter name="autoEnablePsTaskFailover" use="optional" default="false"/>
<!--<parameter name="allOrNothing" use="optional" default="false"/> -->
<parameter name="maxTrainingTimeInHour" use="optional" default="0"/>
</parameters>
<workflow>
<start to="getEntry" />
<action name="getEntry">
<script>
<input_vars>
<var name="script_in" value="${parameters.script}" />
<var name="entryFile_in" value="${parameters.entryFile}" />
<var name="config" value="${parameters.config}" />
<var name="cluster" value="${parameters.cluster}" />
<var name="res_project" value="${parameters.res_project}" />
<var name="version" value="${parameters.version}" />
</input_vars>
<output_vars>
<var name="script"/>
<var name="entryFile"/>
</output_vars>
<function>getEntry</function>
</script>
<ok to="parseTable" />
<error to="failed" />
</action>
<action name="parseTable">
<script>
<input_vars>
<var name="cmd" value="${parameters.cmd}"/>
<var name="inputTable" value="${parameters.input_table}" />
<var name="outputTable" value="${parameters.output_table}" />
<var name="selectedCols" value="${parameters.selected_cols}" />
<var name="excludedCols" value="${parameters.excluded_cols}" />
<var name="reservedCols" value="${parameters.reserved_cols}" />
<var name="lifecycle" value="${parameters.lifecycle}" />
<var name="outputCol" value="${parameters.output_cols}" />
<var name="tables" value="${parameters.tables}" />
<var name="trainTables" value="${parameters.train_tables}" />
<var name="evalTables" value="${parameters.eval_tables}" />
<var name="boundaryTable" value="${parameters.boundary_table}" />
<var name="queryTable" value="${parameters.query_table}" />
<var name="docTable" value="${parameters.doc_table}" />
</input_vars>
<output_vars>
<var name="all_cols"/>
<var name="all_col_types"/>
<var name="selected_cols"/>
<var name="reserved_cols"/>
<var name="create_table_sql"/>
<var name="add_partition_sql"/>
<var name="tables"/>
</output_vars>
<function>parseTable</function>
</script>
<ok to="createPredictTable" />
<error to="failed" />
</action>
<action name="createPredictTable">
<SQL>
<sql>${workflow.parseTable.create_table_sql}</sql>
</SQL>
<ok to="addPartition" />
<error to="failed" />
</action>
<action name="addPartition">
<SQL>
<sql>${workflow.parseTable.add_partition_sql}</sql>
</SQL>
<ok to="getHyperParams" />
<error to="failed" />
</action>
<action name="getHyperParams">
<script>
<input_vars>
<var name="config" value="${parameters.config}" />
<var name="cmd" value="${parameters.cmd}" />
<var name="checkpoint_path" value="${parameters.checkpoint_path}" />
<var name="fine_tune_checkpoint" value="${parameters.fine_tune_checkpoint}" />
<var name="eval_result_path" value="${parameters.eval_result_path}" />
<var name="export_dir" value="${parameters.export_dir}" />
<var name="gpuRequired" value="${parameters.gpuRequired}" />
<var name="cpuRequired" value="${parameters.cpuRequired}" />
<var name="memRequired" value="${parameters.memRequired}" />
<var name="cluster" value="${parameters.cluster}" />
<var name="continue_train" value="${parameters.continue_train}" />
<var name="distribute_strategy" value="${parameters.distribute_strategy}" />
<var name="with_evaluator" value="${parameters.with_evaluator}"/>
<var name="eval_method" value="${parameters.eval_method}"/>
<!-- edit pipeline_config from cmd line -->
<var name="edit_config_json" value="${parameters.edit_config_json}"/>
<!-- for both train/evaluate/predict -->
<var name="selected_cols" value="${workflow.parseTable.selected_cols}"/>
<!-- for hyperparameter tuning -->
<var name="model_dir" value="${parameters.model_dir}"/>
<var name="hpo_param_path" value="${parameters.hpo_param_path}"/>
<var name="hpo_metric_save_path" value="${parameters.hpo_metric_save_path}"/>
<!-- for cmd = predict only -->
<var name="saved_model_dir" value="${parameters.saved_model_dir}"/>
<var name="all_cols" value="${workflow.parseTable.all_cols}"/>
<var name="all_col_types" value="${workflow.parseTable.all_col_types}"/>
<var name="reserved_cols" value="${workflow.parseTable.reserved_cols}"/>
<var name="output_cols" value="${parameters.output_cols}"/>
<var name="model_outputs" value="${parameters.model_outputs}"/>
<var name="input_table" value="${parameters.input_table}"/>
<var name="output_table" value="${parameters.output_table}"/>
<var name="tables" value="${workflow.parseTable.tables}"/>
<!-- for cmd = vector_retrieve only -->
<var name="query_table" value="${parameters.query_table}"/>
<var name="doc_table" value="${parameters.doc_table}"/>
<var name="knn_distance" value="${parameters.knn_distance}"/>
<var name="knn_num_neighbours" value="${parameters.knn_num_neighbours}"/>
<var name="knn_feature_dims" value="${parameters.knn_feature_dims}"/>
<var name="knn_index_type" value="${parameters.knn_index_type}"/>
<var name="knn_feature_delimiter" value="${parameters.knn_feature_delimiter}"/>
<var name="knn_nlist" value="${parameters.knn_nlist}"/>
<var name="knn_nprobe" value="${parameters.knn_nprobe}"/>
<var name="knn_compress_dim" value="${parameters.knn_compress_dim}"/>
<!-- separate train and test -->
<var name="train_tables" value="${parameters.train_tables}"/>
<var name="eval_tables" value="${parameters.eval_tables}"/>
<var name="boundary_table" value="${parameters.boundary_table}"/>
<var name="batch_size" value="${parameters.batch_size}"/>
<!-- for save predict timeline stats -->
<var name="profiling_file" value="${parameters.profiling_file}"/>
<!-- for mask feature for eval -->
<var name="mask_feature_name" value="${parameters.mask_feature_name}"/>
<!-- for extra parameters -->
<var name="extra_params" value="${parameters.extra_params}"/>
</input_vars>
<output_vars>
<var name="hyperParameters"/>
<var name="cluster"/>
<var name="tables"/>
<var name="outputs"/>
</output_vars>
<function>getHyperParams</function>
</script>
<ok to="runTF" />
<error to="failed" />
</action>
<action name="runTF">
<sub-workflow>
<parameters>
<p name="script" value="${workflow.getEntry.script}"/>
<p name="entryFile" value="${workflow.getEntry.entryFile}"/>
<p name="volumes" value="${parameters.volumes}"/>
<p name="buckets" value="${parameters.buckets}"/>
<p name="tables" value="${workflow.getHyperParams.tables}"/>
<p name="outputs" value="${workflow.getHyperParams.outputs}"/>
<p name="cluster" value="${workflow.getHyperParams.cluster}"/>
<p name="gpuRequired" value="${parameters.gpuRequired}"/>
<p name="userDefinedParameters" value="${workflow.getHyperParams.hyperParameters}"/>
<p name="useSparseClusterSchema" value="${parameters.useSparseClusterSchema}"/>
<p name="autoEnablePsTaskFailover" value="${parameters.autoEnablePsTaskFailover}"/>
<!--<p name="allOrNothing" value="${parameters.allOrNothing}"/>-->
<p name="maxTrainingTimeInHour" value="${parameters.maxTrainingTimeInHour}"/>
</parameters>
<name>tensorflow1120</name>
<project>algo_public</project>
</sub-workflow>
<ok to="end" />
<error to="failed" />
</action>
<fail name="failed">
<code>1</code>
<message>job failed</message>
</fail>
<end name="end"/>
</workflow>
</xflow>